#!/usr/bin/python
#
# $Id: imap_sort.py,v 1.3 2004/11/02 22:26:31 connolly Exp $
#
# Network Working Group                                        M. Crispin
# Request for Comments: 2060                     University of Washington
# Obsoletes: 1730                                           December 1996
# Category: Standards Track
#
#            INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
# http://www.ietf.org/rfc/rfc2060.txt
# http://www.faqs.org/rfcs/rfc2060.html

# Why UID? why not just use Message-ID?
# "registered" charset: registered as of when? as of the writing
# of RFC2060? as of the server implementation?
#
# concurrent access semantics: recent. goofy?

# Python Library Reference
#
#                             Guido van Rossum
#
#                   Corporation for National Research Initiatives (CNRI) 
#                   1895 Preston White Drive, Reston, Va 20191, USA 
#                 E-mail: guido@CNRI.Reston.Va.US, guido@python.org 
#
#                                  February 19, 1999
#                                    Release 1.5.2
#http://www.python.org/doc/lib/module-imaplib.html

#@@bug: * SEARCH
# Utagged response sometimes has no data, no separating space

import sys

import getpass, string

#@@patched imaplib
import imaplib2
imaplib = imaplib2

Debug = 0

def toOrCC(str):
    return "OR TO \"%s\" CC \"%s\"" % (str, str)

Queries = ((toOrCC('w3c-xml-cg'), 'nsmail/02-work/01-xml-cg'),
	   (toOrCC('w3c-xml-schema-wg@'),'nsmail/02-work/04-xml-schema'),
	   )


def progress(*args):
    for a in args:
	sys.stderr.write(str(a) + " ")
    sys.stderr.write("\n")

def main(argv):
    global Debug

    i = 1
    dedup = None
    qf = None
    mergeSrc = None
    srvHost = 'localhost';
    srvPort = 143;

    while i < len(argv):
	if argv[i] == '-D':
	    d = string.atoi(argv[i+1])
	    imaplib.Debug = d
	    Debug = d
	    i = i + 1

	elif argv[i] == '-q':
	    qf = argv[i+1]
	    i = i + 1

	elif argv[i] == '-s':
	    srvHost = argv[i+1]
	    i = i + 1

	elif argv[i] == '-p':
	    srvPort = string.atoi(argv[i+1])
	    i = i + 1

	elif argv[i] == '--dups':
	    dedup = argv[i+1:]
	    i = len(argv)

	elif argv[i] == '--mergeFlags':
	    mergeSrc = argv[i+1]
	    mergeDest = argv[i+2]
	    i = i + 2
	i = i + 1

    progress("connecting to", (srvHost, srvPort))
    M = imaplib.IMAP4(srvHost, srvPort)
    progress("logging in")
    M.LOGIN(getpass.getuser(), getpass.getpass())

    if dedup:
	for folder in dedup:
	    progress("deduping folder %s" % (folder,))
	    removeDups(M, folder)

    if qf:
	doQueries(M, qf, Queries)

    if mergeSrc:
	mergeFlags(M, mergeSrc, mergeDest)

    M.LOGOUT()

def removeDups(mail, box):
    progress("selecting", box)
    mail.SELECT(box)

    progress("fetching envelopes in", box)
    (typ, data) = mail.fetch('1:*', 'ENVELOPE')

    envelopes = []
    for d in data:
	num = d[0]
	rawEnv = d[1:]
	envelopes.append((num, rawEnv))

    def byEnv(x, y):
	return cmp(x[1], y[1])

    progress("sorting envelopes in", box)
    envelopes.sort(byEnv)
    orig = (None, None)
    dups = ""

    progress("looking for dups in", box)
    for e in envelopes:
	if e[1] == orig[1]:
	    po = parseList(orig[1])[1]
	    pe = parseList(e[1])[1]
	    progress("==orig: %-5s %s %20s" % (orig[0], po[0], po[1]))
	    progress("== dup: %-5s %s %20s" % (e[0], pe[0], pe[1]))
	    if dups: dups = dups + "," + e[0]
	    else: dups = e[0]

            if len(dups)>1500:
                progress("deleting dups:", dups)
                (ty, dat) = mail.store(dups, '+FLAGS.SILENT', '(\Deleted)')
                dups = ""

	orig = e
    if dups:
	progress("deleting dups:", dups)
	(ty, dat) = mail.store(dups, '+FLAGS.SILENT', '(\Deleted)')
    progress("expunging:", box)
    (ty, dat) = mail.expunge()

def doQueries(mail, box, queries):
    mail.SELECT(box)
    for criteria, dest in queries:
	progress("query:", criteria)
	typ, data = mail.search(None, criteria)

	hits = string.split(data[0])
	if hits:
	    msgSet = string.join(hits, ",")
	    (typ, hits) = mail.fetch(msgSet, 'ENVELOPE')
	    for hit in hits:
		num = hit[0]
		l = parseList(hit[1:])
		progress("query matched:", l)
		assert l[0] is ENVELOPE, l
		env_date, env_subject, \
			  env_from, env_sender, env_reply_to, \
			  env_to, env_cc, env_bcc, \
			  env_in_reply_to, env_message_id = l[1]

	    #print "@@copying [", hits, "] to [", dest, "]"
	    #mail.copy(msgSet, dest)
	    #print "@@deleting [", msgSet, "]"
	    #mail.store(msgSet, '+FLAGS', '(\Deleted)')
	    #mail.expunge()

#MH emulation

class MH:
    def __init__(self, imapconn = None):
	if imapconn == None:
	    imapconn = imaplib2.IMAP4()
	    imapconn.LOGIN(getpass.getuser(), getpass.getpass())
	self._imap = imapconn
	self._selection = []

    def folder(self, f):
	self._imap.select(f)

    def scan(self):
	if self._selection:
	    msgSet = string.join(self._selection, ",")
	else:
	    msgSet = "1:*"

	(typ, hits) = self._imap.fetch(msgSet, 'ENVELOPE')
	for hit in hits:
	    num = string.atoi(hit[0])
	    l = parseList(hit[1:])
	    assert l[0] is ENVELOPE, l
	    env_date, env_subject, \
		      env_from, env_sender, env_reply_to, \
		      env_to, env_cc, env_bcc, \
		      env_in_reply_to, env_message_id = l[1]
	    who = ''
	    star = ''

	    if env_from: who = env_from[0]
	    if who and self.isme(who):
		if env_to:
		    who = env_to[0]
		    star = '*'
	    who = who[0] or (who[2] + '@' + who[3])
	    print "%-4d: %15.15s%1.1s %30.30s" % (num, who, star, env_subject)

    def isme(self, addr):
	s, p, box, dom = addr
	addr = box + '@' + dom
	# @@HACK! hard-coded
	return addr in ('connolly@w3.org',
			'dan@w3.org',
			'connolly@convex.com',
			'connolly@hal.com',
			'connolly@atrium.com')

    def pick(self, q):
	""" query syntax is per [IMAP] e.g.
	  OR TO "connolly" CC "connolly"
        """
	typ, data = self._imap.search(None, q)

	hits = string.split(data[0]) # hmm... convert to numbers?
	self._selection = hits
	return hits

    def refile(self, dest):
	if self._selection:
	    msgSet = string.join(self._selection, ",")
	    self._imap.copy(msgSet, dest)
	    self._imap.store(msgSet, '+FLAGS.SILENT', '(\Deleted)')
	    self._imap.expunge()


def tblout(*args):
    args = map(str, args)
    print string.join(args, "\t")

class MHPlus(MH):
    def scanwho(self, thunk = None):
	if thunk == None:
	    thunk = tblout

	if self._selection:
	    msgSet = string.join(self._selection, ",")
	else:
	    msgSet = "1:*"

	(typ, hits) = self._imap.fetch(msgSet, 'ENVELOPE')
	for hit in hits:
	    num = string.atoi(hit[0])
	    l = parseList(hit[1:])
	    assert l[0] is ENVELOPE, l
	    env_date, env_subject, \
		      env_from, env_sender, env_reply_to, \
		      env_to, env_cc, env_bcc, \
		      env_in_reply_to, env_message_id = l[1]
	    for af in (env_from or [NIL]):
		for as in (env_sender or [NIL]):
		    for art in (env_reply_to or [NIL]):
			for at in (env_to or [NIL]):
			    for acc in (env_cc or [NIL]):
				for abcc in (env_bcc or [NIL]):
				    thunk(num, env_date, env_message_id,
					  af, as, art, at, acc, abcc)
			      


# s-expression parser
import re
FlagOrAtomPat = re.compile(r'(?P<atom>\\?[^(){\000- %*\"\\]+) *')
StringPat = re.compile(r'"(?P<strdata>[^"]*)" *')


class Symbol:
    symtab = {}

    def __init__(self, name):
	self._name = name

    def __str__(self):
	return self._name

    def __repr__(self):
	return self._name

NIL = ()
Symbol.symtab['NIL'] = NIL

def intern(str):
    try:
	return Symbol.symtab[str]
    except KeyError:
	sym = Symbol(str)
	Symbol.symtab[str] = sym
	return sym

ENVELOPE = intern('ENVELOPE')
FLAGS = intern('FLAGS')
INTERNALDATE = intern('INTERNALDATE')
RFC822_SIZE = intern('RFC822.SIZE')
UID = intern('UID')
assert intern('ENVELOPE') is ENVELOPE

ListError = 'ListError'

def parseList(strs):
    l = []
    restOfList(l, strs)
    return l[0]

def restOfList(l, strs):
    assert len(strs) > 0
    assert len(strs[0]) > 0

    while len(strs):
	if strs[0][0] == "{": # literal
	    # @@check the length with an assertion
	    l.append(strs[1])
	    strs = strs[2:]
	else:
	    str = strs[0]
	    strs = strs[1:]

	    while len(str):
		if str[0] == ')':
		    return str[1:], strs
		elif str[0] == ' ':
		    str = str[1:]
		elif str[0] == '(':
		    sub = []
		    if(str[1:]):
			str, strs = restOfList(sub, [str[1:]] + list(strs))
		    else:
			str, strs = restOfList(sub, strs)
		    l.append(sub)
		else:
		    m = FlagOrAtomPat.match(str)
		    if m:
			l.append(intern(m.group('atom')))
			str = str[m.end():]
		    else:
			m = StringPat.match(str)
			if m:
			    l.append(m.group('strdata'))
			    str = str[m.end():]
			else:
			    raise ListError, str

    return '', []


Duplicate = 'Duplicate'
Missing = 'Missing'

def mergeFlags(mail, srcBox, destBox):
    progress('getting data from', srcBox)
    mail.select(srcBox)
    (typ, data) = mail.fetch('1:*', 'ALL')
    flagLists = {}

    for d in data:
	pe = parseList(d[1:])
	flags = assoc(pe, FLAGS)
	envelope = assoc(pe, ENVELOPE)
	env_date, env_subject, \
		  env_from, env_sender, env_reply_to, \
		  env_to, env_cc, env_bcc, \
		  env_in_reply_to, env_message_id = envelope
	for flag in flags:
	    #print "@@flags:", flag, env_message_id, env_subject
	    if flagLists.has_key(flag):
		flagLists[flag].append(env_message_id)
	    else:
		flagLists[flag] = [env_message_id]

    progress('finding messages in ', destBox)
    mail.select(destBox)
    (typ, data) = mail.uid('FETCH', '1:* ENVELOPE')

    uidOfMid = {}

    for d in data:
	uid = d[0]
	pe = parseList(d[1:])
	envelope = assoc(pe, ENVELOPE)
	mid = envelope[9]
	if uidOfMid.has_key(mid):
	    raise Duplicate, (uid, mid)
	uidOfMid[mid] = uid

    for flag in flagLists.keys():
        progress('flagging %d messages with %s' % (len(flagLists[flag]), flag))
	msgs=[]
	for mid in flagLists[flag]:
	    if uidOfMid.has_key(mid):
		msgs.append(uidOfMid[mid])
	    else:
		raise Missing, (destBox, mid)
	msglist = string.join(msgs, ',')
	(ty, dat) = mail.store(msglist, '+FLAGS.SILENT', '(%s)' % flag)

def assoc(l, k):
    while l:
	if l[0] is k:
	    return l[1]
	l = l[2:]
    raise KeyError

def test():
    l = ('(ENVELOPE ("Tue, 17 Aug 1999 12:59:35 -0500" ', '{44}', "LP doesn't grok \\top, \\bot, \\langle, \\rangle", ' (("Dan Connolly" NIL "connolly" "w3.org")) (("Dan Connolly" NIL "connolly" "w3.org")) (("Dan Connolly" NIL "connolly" "w3.org")) NIL NIL NIL NIL "<37B9A307.81A7E673@w3.org>"))')
    pl = parseList(l)
    epl = [ENVELOPE, ['Tue, 17 Aug 1999 12:59:35 -0500', "LP doesn't grok \\top, \\bot, \\langle, \\rangle", [['Dan Connolly', NIL, 'connolly', 'w3.org']], [['Dan Connolly', NIL, 'connolly', 'w3.org']], [['Dan Connolly', NIL, 'connolly', 'w3.org']], NIL, NIL, NIL, NIL, '<37B9A307.81A7E673@w3.org>']]
    assert(pl == epl)

    mh = MHPlus()
    mh.folder('nsmail/000inbox')
    mh.pick('FROM "connolly"')
    mh.scan()

if __name__ == '__main__':
    #test()
    main(sys.argv)
