unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames.
@ 2009-11-22  0:57 Chris Wilson
  2009-11-27 13:23 ` Carl Worth
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2009-11-22  0:57 UTC (permalink / raw)
  To: notmuch

The majority of filenames will fit within PATH_MAX [4096] (because
that's a hard limit imposed by the filesystems) so we can avoid an
allocation per lookup and thereby eliminate a large proportion of the
overhead of scanning a maildir.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 notmuch-new.c |   75 ++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 0dd2784..13559d1 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -107,6 +107,7 @@ add_files_recursive (notmuch_database_t *notmuch,
 		     add_files_state_t *state)
 {
     DIR *dir = NULL;
+    char buf[4096];
     struct dirent *entry = NULL;
     char *next = NULL;
     time_t path_mtime, path_dbtime;
@@ -114,6 +115,7 @@ add_files_recursive (notmuch_database_t *notmuch,
     notmuch_message_t *message = NULL;
     struct dirent **namelist = NULL;
     int num_entries;
+    int path_len, dname_len;
 
     /* If we're told to, we bail out on encountering a read-only
      * directory, (with this being a clear clue from the user to
@@ -140,6 +142,12 @@ add_files_recursive (notmuch_database_t *notmuch,
 
     int i=0;
 
+    path_len = strlen (path);
+    if (path_len + 2 < (int) sizeof (buf)) {
+	memcpy (buf, path, path_len);
+	buf[path_len] = '/';
+    }
+
     while (!interrupted) {
 	if (i == num_entries)
 	    break;
@@ -164,37 +172,42 @@ add_files_recursive (notmuch_database_t *notmuch,
 	    continue;
 	}
 
-	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	dname_len = strlen (entry->d_name);
+	if (path_len + dname_len + 2 < (int) sizeof (buf)) {
+	    memcpy (buf + path_len + 1, entry->d_name, dname_len);
+	    buf[path_len + dname_len + 1] = '\0';
+	    next = buf;
+	} else {
+	    next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	}
 
 	if (stat (next, st)) {
 	    fprintf (stderr, "Error reading %s: %s\n",
 		     next, strerror (errno));
 	    ret = NOTMUCH_STATUS_FILE_ERROR;
-	    continue;
-	}
-
-	if (S_ISREG (st->st_mode)) {
-	    /* If the file hasn't been modified since the last
-	     * add_files, then we need not look at it. */
-	    if (path_dbtime == 0 || st->st_mtime > path_dbtime) {
-		state->processed_files++;
-
-		status = notmuch_database_add_message (notmuch, next, &message);
-		switch (status) {
-		    /* success */
+	} else {
+	    if (S_ISREG (st->st_mode)) {
+		/* If the file hasn't been modified since the last
+		 * add_files, then we need not look at it. */
+		if (path_dbtime == 0 || st->st_mtime > path_dbtime) {
+		    state->processed_files++;
+
+		    status = notmuch_database_add_message (notmuch, next, &message);
+		    switch (status) {
+			/* success */
 		    case NOTMUCH_STATUS_SUCCESS:
 			state->added_messages++;
 			tag_inbox_and_unread (message);
 			break;
-		    /* Non-fatal issues (go on to next file) */
+			/* Non-fatal issues (go on to next file) */
 		    case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
-		        /* Stay silent on this one. */
+			/* Stay silent on this one. */
 			break;
 		    case NOTMUCH_STATUS_FILE_NOT_EMAIL:
 			fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
 				 next);
 			break;
-		    /* Fatal issues. Don't process anymore. */
+			/* Fatal issues. Don't process anymore. */
 		    case NOTMUCH_STATUS_READONLY_DATABASE:
 		    case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
 		    case NOTMUCH_STATUS_OUT_OF_MEMORY:
@@ -210,25 +223,27 @@ add_files_recursive (notmuch_database_t *notmuch,
 		    case NOTMUCH_STATUS_LAST_STATUS:
 			INTERNAL_ERROR ("add_message returned unexpected value: %d",  status);
 			goto DONE;
-		}
+		    }
 
-		if (message) {
-		    notmuch_message_destroy (message);
-		    message = NULL;
-		}
+		    if (message) {
+			notmuch_message_destroy (message);
+			message = NULL;
+		    }
 
-		if (do_add_files_print_progress) {
-		    do_add_files_print_progress = 0;
-		    add_files_print_progress (state);
+		    if (do_add_files_print_progress) {
+			do_add_files_print_progress = 0;
+			add_files_print_progress (state);
+		    }
 		}
+	    } else if (S_ISDIR (st->st_mode)) {
+		status = add_files_recursive (notmuch, next, st, state);
+		if (status && ret == NOTMUCH_STATUS_SUCCESS)
+		    ret = status;
 	    }
-	} else if (S_ISDIR (st->st_mode)) {
-	    status = add_files_recursive (notmuch, next, st, state);
-	    if (status && ret == NOTMUCH_STATUS_SUCCESS)
-		ret = status;
 	}
 
-	talloc_free (next);
+	if (next != buf)
+	    talloc_free (next);
 	next = NULL;
     }
 
@@ -237,7 +252,7 @@ add_files_recursive (notmuch_database_t *notmuch,
 	ret = status;
 
   DONE:
-    if (next)
+    if (next != buf)
 	talloc_free (next);
     if (entry)
 	free (entry);
-- 
1.6.5.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames.
  2009-11-22  0:57 [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames Chris Wilson
@ 2009-11-27 13:23 ` Carl Worth
  2009-11-27 13:50   ` [PATCH] notmuch-new: Check for non-fatal errors from stat() Chris Wilson
                     ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Carl Worth @ 2009-11-27 13:23 UTC (permalink / raw)
  To: Chris Wilson, notmuch

On Sun, 22 Nov 2009 00:57:10 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> The majority of filenames will fit within PATH_MAX [4096] (because
> that's a hard limit imposed by the filesystems) so we can avoid an
> allocation per lookup and thereby eliminate a large proportion of the
> overhead of scanning a maildir.

Hi Chris,

I *know* I composed a reply to this message earlier, but apparently
you're right that it never went out. (*sigh*---if only I had a reliable
mail client[*]).

  [*] I tried and tried to figure out how to get gnus to save an Fcc (a
      file copy of all outgoing messages), and failed to configure the
      various "fake newsgroup things" that gnus wanted for me to be able
      to do this. I also failed to get message-mode to do an Fcc. I
      settled instead for a Bcc to myself on all messages, deciding that
      it would be nice to see that mail actually went *out* and not just
      that I thought I sent it. Maybe that failed here, I don't know.

      The other piece I want is for unsent mail drafts to automatically
      be saved, and for notmuch to prompt me to continue with a draft if
      I start composing a new message while a draft is
      around. Currently, I can save a draft while composing in
      message-mode with "C-x C-s" but the big bug here is that the
      drafts never get deleted when I send the message, so I can't tell
      unfinished drafts apart from completed-and-sent messages.

Anyway, on to the promised review of the patch:

The basic idea of this I really like, of course. Thanks for helping to
improve the efficiency of notmuch. But this part I don't:

> -	    continue;
> -	}
> -
> -	if (S_ISREG (st->st_mode)) {
> -	    /* If the file hasn't been modified since the last
> -	     * add_files, then we need not look at it. */
> -	    if (path_dbtime == 0 || st->st_mtime > path_dbtime) {
> -		state->processed_files++;
> -
> -		status = notmuch_database_add_message (notmuch, next, &message);
> -		switch (status) {
> -		    /* success */
> +	} else {

It's true that in a former life, one of your primary jobs was to clean
up after me, especially cleaning up things like memory leaks on error
paths. But in my new talloc-enabled world, I'm quite happy to keep
cleaner, easier-to-understand code for the price of just having a
talloced object live slightly longer on an error path.

We do still have to be careful that we don't let such object accumulate
without bounds in some error case, and that they don't lock up important
resources. But when it's just a matter of a dozen bytes or so, talloced
into the parent's context which is going to be freed in the error path
above, then I'm much happier to allow for this transient "leak" rather
than convoluting the code with more cleanup gotos.

One might argue that the error-cleanup goto is a common and
well-understood idiom, so that it's not bad to have. The problem I have
with it is how much work it is to verify. If I'm reading one line of
code in the middle of a function that's testing for an error case and
handling it with goto, then I need to:

	1. Verify this condition, and that a return value variable gets
	   set.

	2. Check down at the end of the function to ensure the correct
	   objects are freed and the correct return value is returned.

	3. Check back up at the beginning of the function to ensure the
	   relevant objects are initialized to NULL.

And beyond verification, one has to code in these three places
simultaneously as well.

Meanwhile, by taking advantage of talloc like I did in the original
version of this code, an error return becomes a much more local decision
and is much simpler to code.

Chris, I'd be interested to hear any thoughts you have on this pattern.

-Carl

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] notmuch-new: Check for non-fatal errors from stat()
  2009-11-27 13:23 ` Carl Worth
@ 2009-11-27 13:50   ` Chris Wilson
  2009-11-28  5:37     ` Carl Worth
  2009-11-27 14:17   ` [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames Chris Wilson
  2009-11-28 17:38   ` Archiving outgoing email in Gnus (was Re: notmuch-new: Eliminate tallocs whilst construct filenames.) Adam Sjøgren
  2 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2009-11-27 13:50 UTC (permalink / raw)
  To: notmuch

Currently we assume that all errors on stat() a dname is fatal (but
continue anyway and report the error at the end). However, some errors
reported by stat() such as a missing file or insufficient privilege,
we can simply ignore and skip the file. For the others, such as a fault
(unlikely!) or out-of-memory, we handle like the other fatal errors by
jumping to the end.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 notmuch-new.c |   13 ++++++++++++-
 1 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 3cde3a7..71224c5 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -168,10 +168,21 @@ add_files_recursive (notmuch_database_t *notmuch,
 	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
 
 	if (stat (next, st)) {
+	    int err = errno;
+
+	    switch (err) {
+	    case ENOENT:
+		/* The file was removed between scandir and now... */
+	    case EPERM:
+	    case EACCES:
+		/* We can't read this file so don't add it to the cache. */
+		continue;
+	    }
+
 	    fprintf (stderr, "Error reading %s: %s\n",
 		     next, strerror (errno));
 	    ret = NOTMUCH_STATUS_FILE_ERROR;
-	    continue;
+	    goto DONE;
 	}
 
 	if (S_ISREG (st->st_mode)) {
-- 
1.6.5.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames.
  2009-11-27 13:23 ` Carl Worth
  2009-11-27 13:50   ` [PATCH] notmuch-new: Check for non-fatal errors from stat() Chris Wilson
@ 2009-11-27 14:17   ` Chris Wilson
  2009-11-28  5:41     ` Carl Worth
  2009-11-28 17:38   ` Archiving outgoing email in Gnus (was Re: notmuch-new: Eliminate tallocs whilst construct filenames.) Adam Sjøgren
  2 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2009-11-27 14:17 UTC (permalink / raw)
  To: Carl Worth; +Cc: notmuch

Excerpts from Carl Worth's message of Fri Nov 27 13:23:06 +0000 2009:
> On Sun, 22 Nov 2009 00:57:10 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > The majority of filenames will fit within PATH_MAX [4096] (because
> > that's a hard limit imposed by the filesystems) so we can avoid an
> > allocation per lookup and thereby eliminate a large proportion of the
> > overhead of scanning a maildir.
> 
> Hi Chris,
> 
> I *know* I composed a reply to this message earlier, but apparently
> you're right that it never went out. (*sigh*---if only I had a reliable
> mail client[*]).

I hear there's one called sup... ;-)

> Anyway, on to the promised review of the patch:
> 
> The basic idea of this I really like, of course. Thanks for helping to
> improve the efficiency of notmuch. But this part I don't:

It's a bit outdated now, the impact of the asprintf overhead is lost
with the introduction of the scandir. I'm sure the profiles will
indicate something else to improve beyond xapian...

> One might argue that the error-cleanup goto is a common and
> well-understood idiom, so that it's not bad to have. The problem I have
> with it is how much work it is to verify. If I'm reading one line of
> code in the middle of a function that's testing for an error case and
> handling it with goto, then I need to:
> 
>     1. Verify this condition, and that a return value variable gets
>        set.
> 
>     2. Check down at the end of the function to ensure the correct
>        objects are freed and the correct return value is returned.
> 
>     3. Check back up at the beginning of the function to ensure the
>        relevant objects are initialized to NULL.
> 
> And beyond verification, one has to code in these three places
> simultaneously as well.
> 
> Meanwhile, by taking advantage of talloc like I did in the original
> version of this code, an error return becomes a much more local decision
> and is much simpler to code.

The issue I see with the "error, continue" pattern is that we are in
danger of not reporting the first error but the last one. The common
practice is abort on error and cleanup, and this single instance is
inconsistent with the rest of the error handling everywhere else in
notmuch-new.c. The argument to counter your 3 points is the unified
unwind approach where there is just a single exit path that handles
both error and normal returns. (You always have to set the appropriate
error value whether you continue or abort.)

The advantage of talloc is that it provides a convenient allocation
context that not only groups object by their associated lifetimes, but
provides a single point of access for reaping allocations on unwind. I
don't see how talloc affects the decision process on how to actually
handle errors, but it does make it easier to cleanup afterwards.

Is notmuch ready for fault-injection yet? Maybe once you have a nasty
testsuite...
-ickle
-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] notmuch-new: Check for non-fatal errors from stat()
  2009-11-27 13:50   ` [PATCH] notmuch-new: Check for non-fatal errors from stat() Chris Wilson
@ 2009-11-28  5:37     ` Carl Worth
  0 siblings, 0 replies; 8+ messages in thread
From: Carl Worth @ 2009-11-28  5:37 UTC (permalink / raw)
  To: Chris Wilson, notmuch

[-- Attachment #1: Type: text/plain, Size: 490 bytes --]

On Fri, 27 Nov 2009 13:50:11 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Currently we assume that all errors on stat() a dname is fatal (but
> continue anyway and report the error at the end). However, some errors
> reported by stat() such as a missing file or insufficient privilege,
> we can simply ignore and skip the file. For the others, such as a fault
> (unlikely!) or out-of-memory, we handle like the other fatal errors by
> jumping to the end.

Thanks! Pushed.

-Carl

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames.
  2009-11-27 14:17   ` [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames Chris Wilson
@ 2009-11-28  5:41     ` Carl Worth
  2009-11-28  5:58       ` Jeffrey Ollie
  0 siblings, 1 reply; 8+ messages in thread
From: Carl Worth @ 2009-11-28  5:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: notmuch

[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]

On Fri, 27 Nov 2009 14:17:02 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > I *know* I composed a reply to this message earlier, but apparently
> > you're right that it never went out. (*sigh*---if only I had a reliable
> > mail client[*]).
> 
> I hear there's one called sup... ;-)

Heh. But seriously, I hit a lot of crashes with sup, and that invariably
led to *lots* of lost tag changes. I'm willing to live with lots of
Xapian-defect-250 pain right now to avoid that lossage.

> The issue I see with the "error, continue" pattern is that we are in
> danger of not reporting the first error but the last one.

OK. That would be a problem, yes.

> Is notmuch ready for fault-injection yet? Maybe once you have a nasty
> testsuite...

It's not "ready" in the sense that there is going to be a huge series of
fixes that fault-injection will find. But it's definitely "ready" in
the sense that I want to start doing this kind of testing.

But yes, we need a test suite.

Oh, and we'll also need to deal with remaining glib usage inside of
notmuch, (and inside of GMime as well), before we can do good testing
for memory-fault injection. Maybe what we'll end up with is a patch to
de-glib-ify GMime? I'm not sure.

-Carl

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames.
  2009-11-28  5:41     ` Carl Worth
@ 2009-11-28  5:58       ` Jeffrey Ollie
  0 siblings, 0 replies; 8+ messages in thread
From: Jeffrey Ollie @ 2009-11-28  5:58 UTC (permalink / raw)
  To: Carl Worth; +Cc: notmuch

On Fri, Nov 27, 2009 at 11:41 PM, Carl Worth <cworth@cworth.org> wrote:
>
> But yes, we need a test suite.

I have zero experience, but Check[1] looks interesting.

> Oh, and we'll also need to deal with remaining glib usage inside of
> notmuch, (and inside of GMime as well), before we can do good testing
> for memory-fault injection. Maybe what we'll end up with is a patch to
> de-glib-ify GMime? I'm not sure.

Rather than de-glib-ify'ing GMime wouldn't it be better to find a non
glib-based library for MIME processing?  I do most of my development
in Python these days so I don't know of any examples.  My googling
turned up several C++ libraries or c-client and I wouldn't want to go
down either of those paths.

[1] http://check.sourceforge.net/

-- 
Jeff Ollie

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Archiving outgoing email in Gnus (was Re: notmuch-new: Eliminate tallocs whilst construct filenames.)
  2009-11-27 13:23 ` Carl Worth
  2009-11-27 13:50   ` [PATCH] notmuch-new: Check for non-fatal errors from stat() Chris Wilson
  2009-11-27 14:17   ` [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames Chris Wilson
@ 2009-11-28 17:38   ` Adam Sjøgren
  2 siblings, 0 replies; 8+ messages in thread
From: Adam Sjøgren @ 2009-11-28 17:38 UTC (permalink / raw)
  To: notmuch

On Fri, 27 Nov 2009 05:23:06 -0800, Carl wrote:

>   [*] I tried and tried to figure out how to get gnus to save an Fcc (a
>       file copy of all outgoing messages), and failed to configure the
>       various "fake newsgroup things" that gnus wanted for me to be able
>       to do this.

I use this simple recipe for archiving all my outgoing emails and
news-articles in nnml+archive:{news,mail}-{year} in Gnus:

  ; Define nnml+archive: for archiving emails and news in ~/Mail/archive:
  (setq gnus-message-archive-method '(nnml "archive"
                                           (nnml-directory "~/Mail/archive")
                                           (nnml-active-file "~/Mail/archive/active")
                                           (nnml-get-new-mail nil)
                                           (nnml-inhibit-expiry t)))

  ; Tell Gnus to archive outgoing articles/emails automatically:
  (setq gnus-message-archive-group
        '((if (message-news-p)
              (concat "news-" (format-time-string "%Y"))
            (concat "mail-" (format-time-string "%Y")))))

  ; Mark gcc'ed (archive) as read:
  (setq gnus-gcc-mark-as-read t)

>       The other piece I want is for unsent mail drafts to automatically
>       be saved, and for notmuch to prompt me to continue with a draft if
>       I start composing a new message while a draft is
>       around. Currently, I can save a draft while composing in
>       message-mode with "C-x C-s" but the big bug here is that the
>       drafts never get deleted when I send the message, so I can't tell
>       unfinished drafts apart from completed-and-sent messages.

(This works for me in Gnus: automatic saving, deletion when sending.)


  Best regards,

    Adam

-- 
 "I'll dye without my hair!"                                  Adam Sjøgren
                                                         asjo@koldfront.dk

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-11-28 17:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-22  0:57 [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames Chris Wilson
2009-11-27 13:23 ` Carl Worth
2009-11-27 13:50   ` [PATCH] notmuch-new: Check for non-fatal errors from stat() Chris Wilson
2009-11-28  5:37     ` Carl Worth
2009-11-27 14:17   ` [PATCH] notmuch-new: Eliminate tallocs whilst construct filenames Chris Wilson
2009-11-28  5:41     ` Carl Worth
2009-11-28  5:58       ` Jeffrey Ollie
2009-11-28 17:38   ` Archiving outgoing email in Gnus (was Re: notmuch-new: Eliminate tallocs whilst construct filenames.) Adam Sjøgren

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).