unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH] Add post-add and post-tag hooks
@ 2009-12-22  2:56 Tomas Carnecky
  2009-12-22  3:18 ` Tomas Carnecky
  2009-12-22 23:02 ` Olly Betts
  0 siblings, 2 replies; 5+ messages in thread
From: Tomas Carnecky @ 2009-12-22  2:56 UTC (permalink / raw)
  To: notmuch

The post-add hook is run by 'notmuch new' after each new message is added,
post-tag is run after a tag has been added or removed. The hooks are stored
in the users home directory (~/.notmuch/hooks/).

Since post-tag is run unconditionally every time a new tag is added or removed,
that means it is also invoked when 'notmuch new' adds the two implicit
tags (inbox, unread). So make sure your scripts don't choke on that and can
be both executed in parallel.

Signed-off-by: Tomas Carnecky <tom@dbservice.com>
---
 lib/message.cc |   45 ++++++++++++++++++++++++++++++++++++++
 notmuch-new.c  |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+), 0 deletions(-)

diff --git a/lib/message.cc b/lib/message.cc
index 49519f1..bcd8abb 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -664,6 +664,47 @@ _notmuch_message_remove_term (notmuch_message_t *message,
     return NOTMUCH_PRIVATE_STATUS_SUCCESS;
 }
 
+/* Run the post-tag hook */
+static void
+post_tag_hook (notmuch_message_t *message, const char *tag, int added)
+{
+    /* Skip tags that notmuch itself assigns to new messages */
+    const char *skip[] = {
+        "inbox", "unread"
+    };
+
+    for (int i = 0; i < sizeof (skip) / sizeof (skip[0]); ++i) {
+        if (strcmp(skip[i], tag) == 0)
+            return;
+    }
+
+    char proc[PATH_MAX];
+    snprintf (proc, PATH_MAX, "%s/.notmuch/hooks/post-tag", getenv("HOME"));
+    if (access (proc, X_OK))
+        return;
+
+    int pid = fork ();
+    if (pid == -1)
+        return;
+
+    /* Wait for the hook to finish. This behaviour might be changed in the
+     * future, but for now I think it's better to take the safe route. */
+    if (pid > 0) {
+        waitpid (0, NULL, 0);
+        return;
+    }
+
+    const char *filename = notmuch_message_get_filename (message);
+    const char *message_id = notmuch_message_get_message_id (message);
+
+    const char *args[] = {
+        proc, message_id, filename, tag, added ? "added" : "removed", NULL
+    };
+
+    execv (proc, (char *const *) &args);
+    exit (0);
+}
+
 notmuch_status_t
 notmuch_message_add_tag (notmuch_message_t *message, const char *tag)
 {
@@ -684,6 +725,8 @@ notmuch_message_add_tag (notmuch_message_t *message, const char *tag)
     if (! message->frozen)
 	_notmuch_message_sync (message);
 
+    post_tag_hook (message, tag, 1);
+
     return NOTMUCH_STATUS_SUCCESS;
 }
 
@@ -707,6 +750,8 @@ notmuch_message_remove_tag (notmuch_message_t *message, const char *tag)
     if (! message->frozen)
 	_notmuch_message_sync (message);
 
+    post_tag_hook (message, tag, 0);
+
     return NOTMUCH_STATUS_SUCCESS;
 }
 
diff --git a/notmuch-new.c b/notmuch-new.c
index 837ae4f..d984aae 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -42,6 +42,71 @@ handle_sigint (unused (int sig))
     interrupted = 1;
 }
 
+/* Run the post-add hook. The hook is given the chance to specify additional tags
+ * that should be added to the message. The hook writes the tags to its stdout,
+ * separated by a newline. The script's stdout is redirected to a pipe so that
+ * notmuch can process its output. The tags can be prefixed with '+' or '-' to
+ * indicate if the tag should be added or removed. Absence of one of these prefixes
+ * means that the tag will be added. */
+static void
+post_add_hook (notmuch_message_t *message)
+{
+    char proc[PATH_MAX];
+    snprintf (proc, PATH_MAX, "%s/.notmuch/hooks/post-add", getenv ("HOME"));
+    if (access (proc, X_OK))
+        return;
+
+    /* The pipe between the hook and the notmuch process. The script writes
+     * into fds[0], notmuch reads from fds[1]. */
+    int fds[2];
+    if (pipe (fds))
+	return;
+
+    int pid = fork ();
+    if (pid == -1) {
+	close (fds[0]);
+	close (fds[1]);
+	return;
+    } else if (pid > 0) {
+	close (fds[0]);
+	waitpid (0, NULL, 0);
+
+	char buffer[256] = { 0, };
+	read (fds[1], buffer, sizeof (buffer));
+
+	char *tag;
+	for (tag = buffer; tag && *tag; ) {
+	    char *end = strchr (tag, '\n');
+	    if (end)
+		*end = 0;
+
+	    if (tag[0] == '+')
+		notmuch_message_add_tag (message, tag + 1);
+	    else if (tag[0] == '-')
+		notmuch_message_remove_tag (message, tag + 1);
+	    else
+		notmuch_message_add_tag (message, tag);
+
+	    tag = end ? end + 1 : end;
+	}
+
+	return;
+    }
+
+    /* This is the child process (where the hook runs) */
+    close (fds[1]);
+    dup2 (fds[0], 1);
+
+    const char *filename = notmuch_message_get_filename (message);
+    const char *message_id = notmuch_message_get_message_id (message);
+    const char *args[] = {
+	proc, message_id, filename, NULL
+    };
+
+    execv (proc, (char *const *) &args);
+    exit (0);
+}
+
 static void
 tag_inbox_and_unread (notmuch_message_t *message)
 {
@@ -253,6 +318,7 @@ add_files_recursive (notmuch_database_t *notmuch,
 		    case NOTMUCH_STATUS_SUCCESS:
 			state->added_messages++;
 			tag_inbox_and_unread (message);
+			post_add_hook (message);
 			break;
 		    /* Non-fatal issues (go on to next file) */
 		    case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
-- 
1.6.6.rc3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add post-add and post-tag hooks
  2009-12-22  2:56 [PATCH] Add post-add and post-tag hooks Tomas Carnecky
@ 2009-12-22  3:18 ` Tomas Carnecky
  2009-12-22 23:02 ` Olly Betts
  1 sibling, 0 replies; 5+ messages in thread
From: Tomas Carnecky @ 2009-12-22  3:18 UTC (permalink / raw)
  To: notmuch

On 12/22/09 3:56 AM, Tomas Carnecky wrote:
 > The post-add hook is run by 'notmuch new' after each new message is 
added,
 > post-tag is run after a tag has been added or removed. The hooks are 
stored
 > in the users home directory (~/.notmuch/hooks/).
 >
 > Since post-tag is run unconditionally every time a new tag is added 
or removed,
 > that means it is also invoked when 'notmuch new' adds the two implicit
 > tags (inbox, unread). So make sure your scripts don't choke on that 
and can
 > be both executed in parallel.

What are these good for? I (try to) use these two hooks to automatically 
tag messages. But not in the usual way, I don't use static scripts, I 
use a spam filter. I hope to be able to teach it to classify the 
messages, not only spam/ham but also add tags such as patch (does that 
message contain a patch?), tag messages based on which mailing lists the 
messages belong etc.

I use dspam as the spam filter. Each tag is actually a virtual user that 
exists in dspam. When adding new messages dspam classifies the mails and 
I assign the tags based on the result. If dspam deemed the message Spam 
then I set the tag. To train dspam I use the post-tag hook: whenever I 
change a tag (for example add 'spam' to a falsely unrecognized spam), 
the post-tag hook retrains dspam.

Since the post-add hook is running synchronously with 'notmuch new', 
this adds quite a bit overhead. Depending on how fast the spam filter 
is, it adds more or less time to do the import of new messages. It also 
depends on how many tags you want to assign - dspam has to run once for 
each tag to see if the tag should be assigned or not.

tom

--- >8 --- post-add
#!/bin/bash

# This is so that the post-tag doesn't trigger retraining!
export NOTMUCH_POST_ADD=1

MESSAGEID=$1
FILENAME=$2

# Array of tags.
tags=( spam )
for tag in "${tags[@]}"; do
         RESULT="$(/opt/dspam/bin/dspam --user $tag --deliver=summary < 
$FILENAME)"

         if echo $RESULT | grep -q 'result="Spam";'; then
                 echo $tag
         fi
done

# I remove the inbox flag from all new messages and keep only 'unread'
echo "-inbox"
--- >8 ---

--- >8 --- post-tag
#!/bin/sh

if [ "$NOTMUCH_POST_ADD" ]; then
         echo "Exiting due to running in post-add"
         exit
fi

MESSAGEID=$1
FILENAME=$2
TAG=$3
ADDREMOVE=$4

if [ "x$ADDREMOVE" = "xadded" ]; then
         CLASS="spam"
else
         CLASS="innocent"
fi

/opt/dspam/bin/dspam --user $TAG --source=error --class=$CLASS < $FILENAME
--- >8 ---

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add post-add and post-tag hooks
  2009-12-22  2:56 [PATCH] Add post-add and post-tag hooks Tomas Carnecky
  2009-12-22  3:18 ` Tomas Carnecky
@ 2009-12-22 23:02 ` Olly Betts
  2009-12-23  6:57   ` Tomas Carnecky
  1 sibling, 1 reply; 5+ messages in thread
From: Olly Betts @ 2009-12-22 23:02 UTC (permalink / raw)
  To: notmuch

Tomas Carnecky writes:
> #if defined(__sun__)
> 	... sprintf, stat etc
> #else
> 	(void) path;
> 	return dirent->d_type == DT_DIR;
> #endif

Rather than a platform-specific check, it would be better to check if DT_DIR
is defined.

Beware that even on Linux (where the d_type field is present), it may always
contain DT_UNKNOWN for some filesystems, so you really should check for that
case and fall back to using stat() instead.

Cheers,
    Olly

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add post-add and post-tag hooks
  2009-12-22 23:02 ` Olly Betts
@ 2009-12-23  6:57   ` Tomas Carnecky
  2009-12-23  8:29     ` Olly Betts
  0 siblings, 1 reply; 5+ messages in thread
From: Tomas Carnecky @ 2009-12-23  6:57 UTC (permalink / raw)
  To: Olly Betts; +Cc: notmuch

On 12/23/09 12:02 AM, Olly Betts wrote:
> Tomas Carnecky writes:
>> #if defined(__sun__)
>> 	... sprintf, stat etc
>> #else
>> 	(void) path;
>> 	return dirent->d_type == DT_DIR;
>> #endif
>
> Rather than a platform-specific check, it would be better to check if DT_DIR
> is defined.
>
> Beware that even on Linux (where the d_type field is present), it may always
> contain DT_UNKNOWN for some filesystems, so you really should check for that
> case and fall back to using stat() instead.

Currently configure is a simple shell script and not some autoconf 
magic. And I don't know how eager Carl is to use autoconf, scons, cmake 
or similar.

tom

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add post-add and post-tag hooks
  2009-12-23  6:57   ` Tomas Carnecky
@ 2009-12-23  8:29     ` Olly Betts
  0 siblings, 0 replies; 5+ messages in thread
From: Olly Betts @ 2009-12-23  8:29 UTC (permalink / raw)
  To: Tomas Carnecky; +Cc: notmuch

[Sorry, I seemed to manage to attach my reply to the wrong thread...]

On Wed, Dec 23, 2009 at 07:57:21AM +0100, Tomas Carnecky wrote:
> On 12/23/09 12:02 AM, Olly Betts wrote:
>> Rather than a platform-specific check, it would be better to check if DT_DIR
>> is defined.
>>
>> Beware that even on Linux (where the d_type field is present), it may always
>> contain DT_UNKNOWN for some filesystems, so you really should check for that
>> case and fall back to using stat() instead.
>
> Currently configure is a simple shell script and not some autoconf  
> magic. And I don't know how eager Carl is to use autoconf, scons, cmake  
> or similar.

No autoconf magic required (or desirable here that I can see) - here's what
I'm suggesting (untested as written, but Xapian's omega indexer uses an
approach much like this):

#ifdef DT_UNKNOWN
    /* If d_type is available and supported by the FS, avoid a call to stat. */
    if (entries[i]->d_type == DT_UNKNOWN) {
	/* Fall back to calling stat. */
#endif
    {
	char pbuf[PATH_MAX];
        snprintf(pbuf, PATH_MAX, "%s/%s", path, entries[i]->d_name);

	struct stat buf;
	if (stat(pbuf, &buf) == -1 || !S_ISDIR(buf.st_mode))
	    continue;
    }
#ifdef DT_UNKNOWN
    } else if (entries[i]->d_type != DT_DIR) continue;
#endif


Cheers,
    Olly

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-12-23  8:29 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-22  2:56 [PATCH] Add post-add and post-tag hooks Tomas Carnecky
2009-12-22  3:18 ` Tomas Carnecky
2009-12-22 23:02 ` Olly Betts
2009-12-23  6:57   ` Tomas Carnecky
2009-12-23  8:29     ` Olly Betts

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).