unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH] Added mail directory filename pattern support.
@ 2010-02-22 20:07 Bart Massey
  2010-02-23  6:15 ` Stewart Smith
  0 siblings, 1 reply; 3+ messages in thread
From: Bart Massey @ 2010-02-22 20:07 UTC (permalink / raw)
  To: Notmuch Mail

Typically, the filenames in a mail directory that actually
contain mail obey some specific format.  For example, in my
MH email directory, all mail filenames consist only of
digits.

This patch adds support for a config file variable
"filename_pattern" which maybe set to a regex used to filter
only valid mail filenames when scanning.  Effective use of
filename_pattern cuts down on the noise from notmuch, and
may speed it up in some cases.

Signed-off-by: Bart Massey <bart@cs.pdx.edu>
---
 notmuch-client.h |    7 +++++++
 notmuch-config.c |   47 +++++++++++++++++++++++++++++++++++++++++++++--
 notmuch-new.c    |   35 +++++++++++++++++++++++++++++++----
 3 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/notmuch-client.h b/notmuch-client.h
index 77766de..191988c 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -146,6 +146,13 @@ notmuch_config_set_database_path (notmuch_config_t *config,
 				  const char *database_path);
 
 const char *
+notmuch_config_get_filename_regex (notmuch_config_t *config);
+
+void
+notmuch_config_set_filename_regex (notmuch_config_t *config,
+				  const char *filename_regex);
+
+const char *
 notmuch_config_get_user_name (notmuch_config_t *config);
 
 void
diff --git a/notmuch-config.c b/notmuch-config.c
index 95430db..4189f03 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -31,11 +31,22 @@ static const char toplevel_config_comment[] =
 static const char database_config_comment[] =
     " Database configuration\n"
     "\n"
-    " The only value supported here is 'path' which should be the top-level\n"
+    " The value 'path' should be the top-level\n"
     " directory where your mail currently exists and to where mail will be\n"
     " delivered in the future. Files should be individual email messages.\n"
     " Notmuch will store its database within a sub-directory of the path\n"
-    " configured here named \".notmuch\".\n";
+    " configured here named \".notmuch\".\n"
+    "\n"
+    " The optional value 'filename_pattern' should be\n"
+    " a POSIX regular expression matching only those\n"
+    " filenames that will be checked for email\n"
+    " messages.  The match is against the last\n"
+    " component of the pathname only.  Anchors may be\n"
+    " used, and probably should be.  Typically, this\n"
+    " is used to match only files whose name is a\n"
+    " number ala MH, or to match only files in\n"
+    " standard maildir format.  The default pattern\n"
+    " matches anything.\n";
 
 static const char user_config_comment[] =
     " User configuration\n"
@@ -58,6 +69,7 @@ struct _notmuch_config {
     GKeyFile *key_file;
 
     char *database_path;
+    char *filename_regex;
     char *user_name;
     char *user_primary_email;
     char **user_other_email;
@@ -151,6 +163,8 @@ get_username_from_passwd_file (void *ctx)
  *
  *	database_path:		$HOME/mail
  *
+ *	filename_pattern:	.*
+ *
  *	user_name:		From /etc/passwd
  *
  *	user_primary_mail: 	$EMAIL variable if set, otherwise
@@ -195,6 +209,7 @@ notmuch_config_open (void *ctx,
     config->key_file = g_key_file_new ();
 
     config->database_path = NULL;
+    config->filename_regex = NULL;
     config->user_name = NULL;
     config->user_primary_email = NULL;
     config->user_other_email = NULL;
@@ -354,6 +369,34 @@ notmuch_config_set_database_path (notmuch_config_t *config,
 }
 
 const char *
+notmuch_config_get_filename_regex (notmuch_config_t *config)
+{
+    char *regex;
+
+    if (config->filename_regex == NULL) {
+	regex = g_key_file_get_string (config->key_file,
+				      "database", "filename_pattern", NULL);
+	if (regex) {
+	    config->filename_regex = talloc_strdup (config, regex);
+	    free (regex);
+	}
+    }
+
+    return config->filename_regex;
+}
+
+void
+notmuch_config_set_filename_regex (notmuch_config_t *config,
+				   const char *filename_regex)
+{
+    g_key_file_set_string (config->key_file,
+			   "database", "filename_pattern", filename_regex);
+
+    talloc_free (config->filename_regex);
+    config->filename_regex = NULL;
+}
+
+const char *
 notmuch_config_get_user_name (notmuch_config_t *config)
 {
     char *name;
diff --git a/notmuch-new.c b/notmuch-new.c
index f25c71f..531f9a3 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -21,6 +21,8 @@
 #include "notmuch-client.h"
 
 #include <unistd.h>
+#include <sys/types.h>
+#include <regex.h>
 
 typedef struct _filename_node {
     char *filename;
@@ -207,6 +209,7 @@ _entries_resemble_maildir (struct dirent **entries, int count)
 static notmuch_status_t
 add_files_recursive (notmuch_database_t *notmuch,
 		     const char *path,
+		     const regex_t *maybe_regex,
 		     add_files_state_t *state)
 {
     DIR *dir = NULL;
@@ -302,7 +305,7 @@ add_files_recursive (notmuch_database_t *notmuch,
 	}
 
 	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
-	status = add_files_recursive (notmuch, next, state);
+	status = add_files_recursive (notmuch, next, maybe_regex, state);
 	if (status && ret == NOTMUCH_STATUS_SUCCESS)
 	    ret = status;
 	talloc_free (next);
@@ -389,7 +392,7 @@ add_files_recursive (notmuch_database_t *notmuch,
 	}
 
 	/* We're now looking at a regular file that doesn't yet exist
-	 * in the database, so add it. */
+	 * in the database. */
 	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
 
 	state->processed_files++;
@@ -407,6 +410,14 @@ add_files_recursive (notmuch_database_t *notmuch,
 	    fflush (stdout);
 	}
 
+	/* Check against the regex (if any) for valid mail
+	 * file names and bail on failure */
+	if (maybe_regex) {
+	    status = regexec(maybe_regex, entry->d_name, 0, 0, 0);
+	    if (status)
+		goto CLEANUP;
+	}
+
 	status = notmuch_database_add_message (notmuch, next, &message);
 	switch (status) {
 	/* success */
@@ -445,6 +456,7 @@ add_files_recursive (notmuch_database_t *notmuch,
 	    message = NULL;
 	}
 
+    CLEANUP:
 	if (do_add_files_print_progress) {
 	    do_add_files_print_progress = 0;
 	    add_files_print_progress (state);
@@ -509,6 +521,7 @@ add_files_recursive (notmuch_database_t *notmuch,
 static notmuch_status_t
 add_files (notmuch_database_t *notmuch,
 	   const char *path,
+	   const regex_t *maybe_regex,
 	   add_files_state_t *state)
 {
     notmuch_status_t status;
@@ -546,7 +559,7 @@ add_files (notmuch_database_t *notmuch,
 	return NOTMUCH_STATUS_FILE_ERROR;
     }
 
-    status = add_files_recursive (notmuch, path, state);
+    status = add_files_recursive (notmuch, path, maybe_regex, state);
 
     if (timer_is_active) {
 	/* Now stop the timer. */
@@ -713,6 +726,9 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
     int ret = 0;
     struct stat st;
     const char *db_path;
+    const char *filename_regex;
+    regex_t regex;
+    const regex_t *maybe_regex = 0;
     char *dot_notmuch_path;
     struct sigaction action;
     _filename_node_t *f;
@@ -738,6 +754,17 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
 
     db_path = notmuch_config_get_database_path (config);
 
+    filename_regex = notmuch_config_get_filename_regex (config);
+    if (filename_regex) {
+	status = regcomp(&regex, filename_regex, REG_EXTENDED | REG_NOSUB);
+	if (status) {
+	    fprintf (stderr, "Note: Ignoring bad filename_pattern "
+		     "in config file: %s\n", filename_regex);
+	} else {
+	    maybe_regex = &regex;
+	}
+    }
+
     dot_notmuch_path = talloc_asprintf (ctx, "%s/%s", db_path, ".notmuch");
 
     if (stat (dot_notmuch_path, &st)) {
@@ -791,7 +818,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
     add_files_state.removed_files = _filename_list_create (ctx);
     add_files_state.removed_directories = _filename_list_create (ctx);
 
-    ret = add_files (notmuch, db_path, &add_files_state);
+    ret = add_files (notmuch, db_path, maybe_regex, &add_files_state);
 
     removed_files = 0;
     renamed_files = 0;
-- 
1.6.6.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] Added mail directory filename pattern support.
  2010-02-22 20:07 [PATCH] Added mail directory filename pattern support Bart Massey
@ 2010-02-23  6:15 ` Stewart Smith
  2010-02-23 21:31   ` Bart Massey
  0 siblings, 1 reply; 3+ messages in thread
From: Stewart Smith @ 2010-02-23  6:15 UTC (permalink / raw)
  To: Bart Massey; +Cc: Notmuch Mail

On Mon, Feb 22, 2010 at 12:07:31PM -0800, Bart Massey wrote:
> Typically, the filenames in a mail directory that actually
> contain mail obey some specific format.  For example, in my
> MH email directory, all mail filenames consist only of
> digits.
> 
> This patch adds support for a config file variable
> "filename_pattern" which maybe set to a regex used to filter
> only valid mail filenames when scanning.  Effective use of
> filename_pattern cuts down on the noise from notmuch, and
> may speed it up in some cases.

What about the other way around?

e.g. if anybody has ever pointed Evolution at a Maildir, you get a
bunch of Maildir-name.ev-summary and .ev-summary-meta and .ibex.index
and whatever.

A default list of ignored patterns would be pretty easy to come up with. 

-- 
Stewart Smith

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Added mail directory filename pattern support.
  2010-02-23  6:15 ` Stewart Smith
@ 2010-02-23 21:31   ` Bart Massey
  0 siblings, 0 replies; 3+ messages in thread
From: Bart Massey @ 2010-02-23 21:31 UTC (permalink / raw)
  To: Stewart Smith; +Cc: Notmuch Mail

In message <20100223061542.GR17878@flamingspork.com> you wrote:
> On Mon, Feb 22, 2010 at 12:07:31PM -0800, Bart Massey wrote:
> > Typically, the filenames in a mail directory that actually
> > contain mail obey some specific format.  For example, in my
> > MH email directory, all mail filenames consist only of
> > digits.
> > 
> > This patch adds support for a config file variable
> > "filename_pattern" which maybe set to a regex used to filter
> > only valid mail filenames when scanning.  Effective use of
> > filename_pattern cuts down on the noise from notmuch, and
> > may speed it up in some cases.
> 
> What about the other way around?
> 
> e.g. if anybody has ever pointed Evolution at a Maildir, you get a
> bunch of Maildir-name.ev-summary and .ev-summary-meta and .ibex.index
> and whatever.
> 
> A default list of ignored patterns would be pretty easy to come up with. 

I'm not understanding your use case?  For the most common
cases, Maildir and MH, setting the appropriate
filename_pattern will ignore whatever garbage is lying
around the tree and just look at the mail messages.

Maybe my documentation is confusing?  By "filter only valid
mail filenames" I mean "accept only valid mail filenames", not
"filter out only valid mail filenames". :-)

Thanks much for any clarification you can provide.

    Bart Massey
    bart@cs.pdx.edu

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-02-23 21:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-22 20:07 [PATCH] Added mail directory filename pattern support Bart Massey
2010-02-23  6:15 ` Stewart Smith
2010-02-23 21:31   ` Bart Massey

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).