DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Nic has posted 1 posts at DZone. View Full User Profile

Process Email Files Like Unix Find

03.18.2008
| 3511 views |
  • submit to reddit
        I call this program whitelist. It lets you run a command on a bunch of files depending on whether the file is an email and has a from address in a whitelist.

It's useful for maintaining whitelisted mailboxes and analysing mailboxes. With a few more tests it might be a generically useful tool.

#!/usr/bin/python
# Copyright (C) 2008 by Tapsell-Ferrier Limited

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; see the file COPYING.  If not, write to the
# Free Software Foundation, Inc.,   51 Franklin Street, Fifth Floor,
# Boston, MA  02110-1301  USA

import commands
import email.Parser
import sys
import re
import getopt
import os
import os.path

try:
    from email.utils import parseaddr
except:
    from rfc822 import parseaddr


def help():
    print """whitelist.py -h
whitelist.py [-v] [-f whitelist filename] command ; filelist [-]

Execute the specified command (which must be shell escaped if calling
from shell) on all the files in the filelist or, if - is present in
the filelist, read from stdin (like xargs) whenever the file is an
email that contains a from address specified in the whitelist.

Like xargs, or find, the command can include {} as a replacement token
for the matched filename.

The command can also be a header reference, for example:

  $FROM

will print the specified mails From address.

Options:

 -v   specifies that the test is to be negated, executing the action if
      the file does NOT contain a from address in the whiltelist.

 -f   specifies a whitelist, the default is $HOME/.addresses

For example:

 whitelist.py -f .wlist wc \{} \: maildir/cur/*

runs wc on each file in maildir/cur with a FROM address matching
something in the whitelist; or:

 find maildir/INBOX/cur -type f | whitelist.py -v mv \{} mailbox/TRASH/cur \; -

mv's all files in the INBOX with FROMs not matching the whitelist into
a TRASH folder.

  find maildir/Greylist/new -type f | whitelist.py -v $TO \; -

displays the TO address of all messages where the from didn't match
the whitelist.
"""


def read_whitelisted(filename):
    fd = open(filename)
    data = fd.read()
    fd.close()
    return data.split()

def get_msg(filename):
    fd = open(filename)
    try:
        msg = email.Parser.HeaderParser().parse(fd, True)
        return msg
    finally:
        fd.close()

action_re = re.compile("\{}")

def handle(filenames_fn, action, whitelist, negate=False):
    for filename in filenames_fn():
        msg = get_msg(filename)
        realname, addr = parseaddr(msg["from"])
        result = addr in whitelist

        if negate:
            result = not result

        if result:
            try:
                m = re.match("\$(.+)", action)
                result = msg[m.group(1)]
            except Exception:
                cmd_str = action_re.sub(filename, action)
                os.system(cmd_str)
            else:
                print result


def main(args):
    negate = False
    whitelist_filename = os.path.join(os.environ["HOME"], ".addresses")
    opts, args = getopt.getopt(args, "hv")
    for o,a in opts:
        if o == "-h":
            help()
            sys.exit(0)

        elif o == "-v":
            negate = True

        elif o == "-f":
            whitelist_filename = a

    if not os.access(whitelist_filename, os.F_OK):
        print >>sys.stderr, "whitelist.py   -  no whitelist filename\n"
        help()
        sys.exit(1)

    cmdstr = " ".join(args)
    m = re.match("(.*) ;([ ]*.*)", cmdstr)
    if not m:
        sys.exit(1)

    cmd = m.group(1)
    files = m.group(2).strip().split(" ")

    def ffn():
        for f in files:
            if f == "-":
                for innerf in sys.stdin:
                    yield innerf.strip()
            else:
                yield f
        return

    whitelist = read_whitelisted(whitelist_filename)
    handle(ffn, cmd, whitelist, negate)


if __name__ == "__main__":
    main(sys.argv[1:])

# End
    

Comments

Snippets Manager replied on Sun, 2009/01/25 - 11:07pm

The whitelist command is a brilliant code. I use it all the time for maintaining whitelisted mailboxes and also for analyzing mailboxes. ---------- supreme buy - keyword elite - seo elite - water4gas - fat loss 4 idiots - rocket spanish