unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* My mail configuration
@ 2011-02-23 14:22 Ben Gamari
  2011-03-06 21:34 ` Michal Sojka
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Ben Gamari @ 2011-02-23 14:22 UTC (permalink / raw)
  To: notmuch

Here is my mail sorting script that has been slowly evolving for almost
a year now. It uses the Python bindings, along with Bogofilter for spam
filtering. There is also an update-spam script which brings the
Bogofilter database in to synchronization with the notmuch tags. On this
note, if someone wants to implement the ability to hide certain tags
(say, those matching /\..+/) in the emacs interface it would be greatly
appreciated. I have notmuch configured such that all new mail starts
with just the "new" tag. The sorting script then takes it from
there. Hope this will give folks some ideas.

Cheers,

- Ben


===File ~/.env/mail/sort_mail.py============================
#!/usr/bin/python

# Warning:
# Be careful about using Query.count_messages(), it's technically an estimate
# and is not guarranteed to be correct

import os
import logging
import time

logging.basicConfig(level=logging.DEBUG)

_tags = []
start_time = time.time()

def sf_list(name, tag):
        #_tags.append( ('to:%s@lists.sourceforge.net' % name, ['list', tag]) )
        #_tags.append( ('to:%s@lists.sf.net' % name, ['list', tag]) )
        _tags.append( ('to:%s' % name, ['list', tag]) )

def kernel_list(name, tag):
        #_tags.append( ('to:%s@vger.kernel.org' % name, ['list', tag]) )
        _tags.append( ('to:%s' % name, ['list', tag]) )

def fdo_list(name, tag):
        #_tags.append( ('to:%s@lists.freedesktop.org' % name, ['list', tag]) )
        _tags.append( ('to:%s' % name, ['list', tag]) )

def _list(name, tag):
        _tags.append( ('to:%s' % name, ['list', tag]) )

def tag(filter, *tags):
        _tags.append( (filter, tags) )

kernel_list('linux-kernel', 'lkml')
kernel_list('mm-commits', 'mm-commits')
kernel_list('linux-omap', 'linux-omap')
kernel_list('linux-next', 'linux-next')
kernel_list('linux-wireless', 'linux-wireless')
kernel_list('linux-btrfs', 'btrfs')
_list('linux-pm', 'linux-pm')
_list('linux-arm-kernel', 'linux-arm')
sf_list('oprofile-list', 'oprofile')
sf_list('spi-devel-general', 'spi-devel')
sf_list('linux1394-devel', 'ieee1394')

sf_list('ipw3945-devel', 'ipw')
_list('hostap@lists.shmoo.com', 'hostap')
_list('ath9k-devel@', 'ath9k')
_list('vim-dev@vim.org', 'vim')
_list('vim_dev', 'vim')

fdo_list('intel-gfx', 'intel-gfx')
fdo_list('xorg', 'xorg')
fdo_list('hal', 'hal')
fdo_list('compiz', 'compiz')
sf_list('dri-devel', 'dri')
sf_list('dri-users', 'dri')
sf_list('mesa3d-dev', 'mesa')
fdo_list('mesa-dev', 'mesa')

fdo_list('devkit-devel', 'devkit')
sf_list('matplotlib-users', 'matplotlib')
sf_list('matplotlib-devel', 'matplotlib')
_list('notmuch@notmuchmail.org', 'notmuch')
_list('eigen@lists.tuxfamily.org', 'eigen')
_list('launchpad-users@lists.launchpad.net', 'launchpad')
_list('boost@lists.boost.org', 'boost')
_list('debian-python@lists.debian.org', 'debian-python')

_list('geda-user@', 'geda')

_list('openembedded-devel@lists.openembedded.org', 'openembedded')
_list('beagleboard@googlegroups.com', 'beagleboard')
_list('angstrom-distro-devel@linuxtogo.org', 'angstrom')
_list('angstrom-distro-users@linuxtogo.org', 'angstrom')

_list('mono-devel-list@lists.ximian.com', 'mono')
_list('mono-list@', 'mono')
_list('ubuntu-devel-discuss@lists.ubuntu.com', 'ubuntu-devel')
_list('git@vger.kernel.org', 'git')
_list('sup-talk@rubyforge.org', 'sup')
_list('thrust-users@googlegroups.com', 'thrust')
_list('golang-nuts@googlegroups.com', 'go')
_list('numpy-discussion@scipy.org', 'numpy')
_list('scipy-user@scipy.org', 'scipy')

_list('rsync@lists.samba.org', 'rsync')
tag('from:samba-bugs', 'bugs', 'rsync', 'list')

_list('containers@', 'containers')

tag('from:bugzilla', 'bugs', 'list')

# Tags that aren't for lists
tag('from:Facebook', 'facebook')
tag('to:gdh@gdhour.com', 'gdh')

tag('to:bgamari@gmail.com', 'gmail')
tag('to:bgamari.foss@gmail.com', 'foss')
tag('from:Ben Gamari', 'sent')
tag('from:bgamari.foss', 'sent')

from sort_junk import sort_junk
from notmuch_utils import *
import notmuch
db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE)

# Freeze new messages
q_new = notmuch.Query(db, 'tag:new')
n_msgs = 0
for msg in q_new.search_messages():
        msg.freeze()
        n_msgs += 1

# Take care of basics
tag_search(db, 'tag:new', '+unread', '+unseen')

# Take care of feeds
tag_search(db, 'folder:feeds', '+feeds', '-new')

# Run through Bogofilter
sort_junk(q_new)

# Tag things
for filter, tags in _tags:
        tag_search(db, '%s and tag:new' % filter, *tags)

# Ignore things I sent
tag_search(db, 'tag:new and tag:sent', '-unseen', '-new', '-unread', '+watch')

# Update watch tag
for msg in q_new.search_messages():
        q = notmuch.Query(db, 'tag:watch and thread:%s' % msg.get_thread_id())
        if len(q.search_messages()) > 0:
                logging.debug('watching %s' % msg.get_message_id())
                msg.add_tag('watch')

# Watched items should go to inbox
tag_search(db, 'tag:new and tag:watch', '+inbox', '-new')

# Ignore threads that I've already seen
q = notmuch.Query(db, 'tag:new and tag:list')
for msg in q.search_messages():
        q2 = notmuch.Query(db, 'thread:%s and not tag:unseen' % msg.get_thread_id())
        if len(q2.search_messages()) > 0:
                msg.remove_tag('unseen')
                msg.remove_tag('new')

# Remove new from sorted list items
tag_search(db, 'tag:new and tag:list', '-new')

# Tag remaining new items for inbox
tag_search(db, 'tag:new', '+inbox', '-new')

# Thaw new messages
for msg in q_new.search_messages():
        msg.thaw()

end_time = time.time()
logging.info('Sorted %d messages in %1.2f seconds' % (n_msgs, end_time - start_time))

============================================================


===File ~/.env/mail/sort_junk.py============================
#!/usr/bin/python

import logging
import subprocess
from subprocess import PIPE
import notmuch
import re

def sort_junk(query):
        spam_re = re.compile('X-Bogosity:\s*Spam')
        spamicity_re = re.compile('spamicity=(\d\.\d+)')
        bf = subprocess.Popen(['bogofilter', '-bv'], stdin=PIPE, stdout=PIPE)
        for msg in query.search_messages():
                bf.stdin.write(msg.get_filename() + '\n')
                l = bf.stdout.readline()
                if re.search(spam_re, l):
                        logging.debug('Message %s marked as junk' % msg.get_message_id())
                        msg.add_tag('junk')
        bf.stdin.close()

if __name__ == '__main__':
        import sys
        db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE)
        query = notmuch.Query(db, ' '.join(sys.argv[1:]))
        sort_junk(query)

============================================================


===File ~/.env/mail/update-junk=============================
#!/usr/bin/python

import notmuch
from notmuch_utils import *
import subprocess
from time import time
import sys

logging.basicConfig(level=logging.INFO)

db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE)
if '--clean' in sys.argv:
        import shutil, os.path
        shutil.rmtree(os.path.expanduser('~/.bogofilter'))
        tag_search(db, 'tag:.bf_spam', '-.bf_spam')
        tag_search(db, 'tag:.bf_ham', '-.bf_ham')

def do_update(search, tag_func, bf_args):
        start_time = time()
        p = subprocess.Popen(['bogofilter', bf_args], stdin=subprocess.PIPE)
        q = notmuch.Query(db, search)
        n = 0
        for msg in q.search_messages():
                p.stdin.write('%s\n' % msg.get_filename())
                tag_func(msg)
                n += 1
        p.stdin.close()
        p.wait()
        return (n, time()-start_time)

logging.info('Registering spam')
n,t = do_update('tag:junk and not tag:.bf_spam', lambda msg: msg.add_tag('.bf_spam'), '-sb')
logging.info('Registered %d spam in %1.2f seconds' % (n,t))

logging.info('Unregistering spam')
n,t = do_update('not tag:junk and tag:.bf_spam', lambda msg: msg.remove_tag('.bf_spam'), '-Sb')
logging.info('Unregistered %d spam in %1.2f seconds' % (n,t))

# Only consider messages that have been read as ham
logging.info('Registering ham')
n,t = do_update('not tag:junk and not tag:unread and not tag:.bf_ham', lambda msg: msg.add_tag('.bf_ham'), '-nb')
logging.info('Registered %d ham in %1.2f seconds' % (n,t))

logging.info('Unregistering ham')
n,t = do_update('tag:junk and tag:.bf_ham', lambda msg: msg.remove_tag('.bf_ham'), '-Nb')
logging.info('Unregistered %d ham in %1.2f seconds' % (n,t))

============================================================

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-05-20 22:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-23 14:22 My mail configuration Ben Gamari
2011-03-06 21:34 ` Michal Sojka
2011-03-11 14:55   ` Ben Gamari
2011-03-18  7:31 ` Jesse Rosenthal
2011-03-18 12:32   ` Ben Gamari
2011-05-20 22:39 ` eric casteleijn

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).