unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH] Handle message renames in mail spool
@ 2009-11-23  1:13 Mikhail Gusarov
  2009-11-26 15:40 ` Carl Worth
  0 siblings, 1 reply; 5+ messages in thread
From: Mikhail Gusarov @ 2009-11-23  1:13 UTC (permalink / raw)
  To: notmuch

In order to handle message renames the following changes were deemed necessary:

* Mtime check on individual files was disabled. As files may be moved around
without changing their mtime, it's necessary to parse them even if they appear
old in case old message was moved. mtime check on directories was kept as moving
files changes mtime of directory.

* If message being parsed is already found in database under different path,
then this message is considered to be moved, path is updated in database and
this file does not undergo further processing.

Note that after applying this patch notmuch still does not handle copying files
(which is harmless, database will point to the last copy of message found during
'notmuch new') and deleting files (which is more serious, as dangling entries
will show up in searches).

Signed-off-by: Mikhail Gusarov <dottedmag@dottedmag.net>
---
 lib/database.cc |   21 +++++++-----
 notmuch-new.c   |   92 ++++++++++++++++++++++++++----------------------------
 2 files changed, 56 insertions(+), 57 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 2c90019..d88efbe 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -990,20 +990,23 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
 	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
 	    _notmuch_message_set_filename (message, filename);
 	    _notmuch_message_add_term (message, "type", "mail");
+
+	    ret = _notmuch_database_link_message (notmuch, message, message_file);
+	    if (ret)
+		goto DONE;
+
+	    date = notmuch_message_file_get_header (message_file, "date");
+	    _notmuch_message_set_date (message, date);
+
+	    _notmuch_message_index_file (message, filename);
+	} else if (strcmp(notmuch_message_get_filename(message), filename)) {
+	    /* Message file has been moved/renamed */
+	    _notmuch_message_set_filename (message, filename);
 	} else {
 	    ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
 	    goto DONE;
 	}
 
-	ret = _notmuch_database_link_message (notmuch, message, message_file);
-	if (ret)
-	    goto DONE;
-
-	date = notmuch_message_file_get_header (message_file, "date");
-	_notmuch_message_set_date (message, date);
-
-	_notmuch_message_index_file (message, filename);
-
 	_notmuch_message_sync (message);
     } catch (const Xapian::Error &error) {
 	fprintf (stderr, "A Xapian exception occurred adding message: %s.\n",
diff --git a/notmuch-new.c b/notmuch-new.c
index 0dd2784..d16679c 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -174,54 +174,50 @@ add_files_recursive (notmuch_database_t *notmuch,
 	}
 
 	if (S_ISREG (st->st_mode)) {
-	    /* If the file hasn't been modified since the last
-	     * add_files, then we need not look at it. */
-	    if (path_dbtime == 0 || st->st_mtime > path_dbtime) {
-		state->processed_files++;
-
-		status = notmuch_database_add_message (notmuch, next, &message);
-		switch (status) {
-		    /* success */
-		    case NOTMUCH_STATUS_SUCCESS:
-			state->added_messages++;
-			tag_inbox_and_unread (message);
-			break;
-		    /* Non-fatal issues (go on to next file) */
-		    case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
-		        /* Stay silent on this one. */
-			break;
-		    case NOTMUCH_STATUS_FILE_NOT_EMAIL:
-			fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
-				 next);
-			break;
-		    /* Fatal issues. Don't process anymore. */
-		    case NOTMUCH_STATUS_READONLY_DATABASE:
-		    case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
-		    case NOTMUCH_STATUS_OUT_OF_MEMORY:
-			fprintf (stderr, "Error: %s. Halting processing.\n",
-				 notmuch_status_to_string (status));
-			ret = status;
-			goto DONE;
-		    default:
-		    case NOTMUCH_STATUS_FILE_ERROR:
-		    case NOTMUCH_STATUS_NULL_POINTER:
-		    case NOTMUCH_STATUS_TAG_TOO_LONG:
-		    case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW:
-		    case NOTMUCH_STATUS_LAST_STATUS:
-			INTERNAL_ERROR ("add_message returned unexpected value: %d",  status);
-			goto DONE;
-		}
-
-		if (message) {
-		    notmuch_message_destroy (message);
-		    message = NULL;
-		}
-
-		if (do_add_files_print_progress) {
-		    do_add_files_print_progress = 0;
-		    add_files_print_progress (state);
-		}
-	    }
+            state->processed_files++;
+
+            status = notmuch_database_add_message (notmuch, next, &message);
+            switch (status) {
+                /* success */
+                case NOTMUCH_STATUS_SUCCESS:
+                    state->added_messages++;
+                    tag_inbox_and_unread (message);
+                    break;
+		/* Non-fatal issues (go on to next file) */
+                case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
+                    /* Stay silent on this one. */
+                    break;
+                case NOTMUCH_STATUS_FILE_NOT_EMAIL:
+                    fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
+                             next);
+                    break;
+		/* Fatal issues. Don't process anymore. */
+                case NOTMUCH_STATUS_READONLY_DATABASE:
+                case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
+                case NOTMUCH_STATUS_OUT_OF_MEMORY:
+                    fprintf (stderr, "Error: %s. Halting processing.\n",
+                             notmuch_status_to_string (status));
+                    ret = status;
+                    goto DONE;
+                default:
+                case NOTMUCH_STATUS_FILE_ERROR:
+                case NOTMUCH_STATUS_NULL_POINTER:
+                case NOTMUCH_STATUS_TAG_TOO_LONG:
+                case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW:
+                case NOTMUCH_STATUS_LAST_STATUS:
+                    INTERNAL_ERROR ("add_message returned unexpected value: %d",  status);
+                    goto DONE;
+            }
+
+            if (message) {
+                notmuch_message_destroy (message);
+                message = NULL;
+            }
+
+            if (do_add_files_print_progress) {
+                do_add_files_print_progress = 0;
+                add_files_print_progress (state);
+            }
 	} else if (S_ISDIR (st->st_mode)) {
 	    status = add_files_recursive (notmuch, next, st, state);
 	    if (status && ret == NOTMUCH_STATUS_SUCCESS)
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] Handle message renames in mail spool
  2009-11-23  1:13 [PATCH] Handle message renames in mail spool Mikhail Gusarov
@ 2009-11-26 15:40 ` Carl Worth
  2009-11-26 16:38   ` Mikhail Gusarov
  0 siblings, 1 reply; 5+ messages in thread
From: Carl Worth @ 2009-11-26 15:40 UTC (permalink / raw)
  To: Mikhail Gusarov, notmuch

On Mon, 23 Nov 2009 07:13:25 +0600, Mikhail Gusarov <dottedmag@dottedmag.net> wrote:
> In order to handle message renames the following changes were deemed
> necessary:

Hi Mikhail,

I *really* want this patch in, since I think a lot of current users
would really benefit from it. I only see one big problem with it:

> Note that after applying this patch notmuch still does not handle copying files
> (which is harmless, database will point to the last copy of message found during
> 'notmuch new') and deleting files (which is more serious, as dangling entries
> will show up in searches).
...
> +	} else if (strcmp(notmuch_message_get_filename(message), filename)) {
> +	    /* Message file has been moved/renamed */
> +	    _notmuch_message_set_filename (message, filename);

With multiple copies of the same message being present in the mailstore
as different files, the code above will set the filename of the document
to each filename in turn, correct?

Now, I *think* that Xapian defect #250 doesn't hit us here
currently. But there was an old bug in Xapian that changing just the
data of a document would also rewrite all the terms. But we've also
talked recently about storing the filename as a term as well, and once
we add that, then this code would trigger defect #250 and cause a big
slowdown here.

So I think the code just needs to verify that the old filename no longer
exists before it changes anything in the database. And then if it does
still exist, we can get our NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID return
value back here.

>  	} else {
>  	    ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
>  	    goto DONE;
>  	}

In this case, the return value is actually wrong. This is the case for
adding a message file that already exists in the database. I don't know
that we have any users that care about distinguishing this, but if they
did, then DUPLICATE isn't right. I suggest returning
NOTMUCH_STATUS_SUCCESS here.

-Carl

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Handle message renames in mail spool
  2009-11-26 15:40 ` Carl Worth
@ 2009-11-26 16:38   ` Mikhail Gusarov
  2009-11-26 18:23     ` Carl Worth
  0 siblings, 1 reply; 5+ messages in thread
From: Mikhail Gusarov @ 2009-11-26 16:38 UTC (permalink / raw)
  To: notmuch

[-- Attachment #1: Type: text/plain, Size: 349 bytes --]


Twas brillig at 07:40:15 26.11.2009 UTC-08 when cworth@cworth.org did gyre and gimble:

 CW> I *really* want this patch in, since I think a lot of current users
 CW> would really benefit from it. I only see one big problem with it:

Did you test the performance hit caused by disabling mtime checks?

-- 
  http://fossarchy.blogspot.com/

[-- Attachment #2: Type: application/pgp-signature, Size: 834 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Handle message renames in mail spool
  2009-11-26 16:38   ` Mikhail Gusarov
@ 2009-11-26 18:23     ` Carl Worth
  0 siblings, 0 replies; 5+ messages in thread
From: Carl Worth @ 2009-11-26 18:23 UTC (permalink / raw)
  To: Mikhail Gusarov, notmuch

On Thu, 26 Nov 2009 22:38:49 +0600, Mikhail Gusarov <dottedmag@dottedmag.net> wrote:
> 
> Twas brillig at 07:40:15 26.11.2009 UTC-08 when cworth@cworth.org did gyre and gimble:
> 
>  CW> I *really* want this patch in, since I think a lot of current users
>  CW> would really benefit from it. I only see one big problem with it:
> 
> Did you test the performance hit caused by disabling mtime checks?

Not yet, no.

I only had a moment earlier---just long enough to review the patch by
reading it. I haven't actually tested it at all yet.

-Carl

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] Handle message renames in mail spool
@ 2009-11-26 20:23 Mikhail Gusarov
  0 siblings, 0 replies; 5+ messages in thread
From: Mikhail Gusarov @ 2009-11-26 20:23 UTC (permalink / raw)
  To: notmuch

In order to handle message renames the following changes were deemed necessary:

* Mtime check on individual files was disabled. As files may be moved around
without changing their mtime, it's necessary to parse them even if they appear
old in case old message was moved. mtime check on directories was kept as moving
files changes mtime of directory.

* If message being parsed is already found in database under different path,
then this message is considered to be moved, path is updated in database and
this file does not undergo further processing.

Note that after applying this patch notmuch still does not handle copying files
(which is harmless, database will point to the last copy of message found during
'notmuch new') and deleting files (which is more serious, as dangling entries
will show up in searches).

Signed-off-by: Mikhail Gusarov <dottedmag@dottedmag.net>
---
 lib/database.cc |   32 +++++++++++++------
 notmuch-new.c   |   92 ++++++++++++++++++++++++++----------------------------
 2 files changed, 66 insertions(+), 58 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 2c90019..257c0b8 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -990,19 +990,31 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
 	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
 	    _notmuch_message_set_filename (message, filename);
 	    _notmuch_message_add_term (message, "type", "mail");
-	} else {
-	    ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
-	    goto DONE;
-	}
 
-	ret = _notmuch_database_link_message (notmuch, message, message_file);
-	if (ret)
-	    goto DONE;
+	    ret = _notmuch_database_link_message (notmuch, message, message_file);
+	    if (ret)
+		goto DONE;
 
-	date = notmuch_message_file_get_header (message_file, "date");
-	_notmuch_message_set_date (message, date);
+	    date = notmuch_message_file_get_header (message_file, "date");
+	    _notmuch_message_set_date (message, date);
 
-	_notmuch_message_index_file (message, filename);
+	    _notmuch_message_index_file (message, filename);
+	} else {
+	    const char *old_filename = notmuch_message_get_filename (message);
+	    if (strcmp (old_filename, filename) == 0) {
+		/* We have already seen it */
+		goto DONE;
+	    } else {
+		if (access (old_filename, R_OK) == 0) {
+		    /* old_filename still exists, we've got a duplicate */
+		    ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
+		    goto DONE;
+		} else {
+		    /* Message file has been moved/renamed */
+		    _notmuch_message_set_filename (message, filename);
+		}
+	    }
+	}
 
 	_notmuch_message_sync (message);
     } catch (const Xapian::Error &error) {
diff --git a/notmuch-new.c b/notmuch-new.c
index 0dd2784..d16679c 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -174,54 +174,50 @@ add_files_recursive (notmuch_database_t *notmuch,
 	}
 
 	if (S_ISREG (st->st_mode)) {
-	    /* If the file hasn't been modified since the last
-	     * add_files, then we need not look at it. */
-	    if (path_dbtime == 0 || st->st_mtime > path_dbtime) {
-		state->processed_files++;
-
-		status = notmuch_database_add_message (notmuch, next, &message);
-		switch (status) {
-		    /* success */
-		    case NOTMUCH_STATUS_SUCCESS:
-			state->added_messages++;
-			tag_inbox_and_unread (message);
-			break;
-		    /* Non-fatal issues (go on to next file) */
-		    case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
-		        /* Stay silent on this one. */
-			break;
-		    case NOTMUCH_STATUS_FILE_NOT_EMAIL:
-			fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
-				 next);
-			break;
-		    /* Fatal issues. Don't process anymore. */
-		    case NOTMUCH_STATUS_READONLY_DATABASE:
-		    case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
-		    case NOTMUCH_STATUS_OUT_OF_MEMORY:
-			fprintf (stderr, "Error: %s. Halting processing.\n",
-				 notmuch_status_to_string (status));
-			ret = status;
-			goto DONE;
-		    default:
-		    case NOTMUCH_STATUS_FILE_ERROR:
-		    case NOTMUCH_STATUS_NULL_POINTER:
-		    case NOTMUCH_STATUS_TAG_TOO_LONG:
-		    case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW:
-		    case NOTMUCH_STATUS_LAST_STATUS:
-			INTERNAL_ERROR ("add_message returned unexpected value: %d",  status);
-			goto DONE;
-		}
-
-		if (message) {
-		    notmuch_message_destroy (message);
-		    message = NULL;
-		}
-
-		if (do_add_files_print_progress) {
-		    do_add_files_print_progress = 0;
-		    add_files_print_progress (state);
-		}
-	    }
+            state->processed_files++;
+
+            status = notmuch_database_add_message (notmuch, next, &message);
+            switch (status) {
+                /* success */
+                case NOTMUCH_STATUS_SUCCESS:
+                    state->added_messages++;
+                    tag_inbox_and_unread (message);
+                    break;
+		/* Non-fatal issues (go on to next file) */
+                case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
+                    /* Stay silent on this one. */
+                    break;
+                case NOTMUCH_STATUS_FILE_NOT_EMAIL:
+                    fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
+                             next);
+                    break;
+		/* Fatal issues. Don't process anymore. */
+                case NOTMUCH_STATUS_READONLY_DATABASE:
+                case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
+                case NOTMUCH_STATUS_OUT_OF_MEMORY:
+                    fprintf (stderr, "Error: %s. Halting processing.\n",
+                             notmuch_status_to_string (status));
+                    ret = status;
+                    goto DONE;
+                default:
+                case NOTMUCH_STATUS_FILE_ERROR:
+                case NOTMUCH_STATUS_NULL_POINTER:
+                case NOTMUCH_STATUS_TAG_TOO_LONG:
+                case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW:
+                case NOTMUCH_STATUS_LAST_STATUS:
+                    INTERNAL_ERROR ("add_message returned unexpected value: %d",  status);
+                    goto DONE;
+            }
+
+            if (message) {
+                notmuch_message_destroy (message);
+                message = NULL;
+            }
+
+            if (do_add_files_print_progress) {
+                do_add_files_print_progress = 0;
+                add_files_print_progress (state);
+            }
 	} else if (S_ISDIR (st->st_mode)) {
 	    status = add_files_recursive (notmuch, next, st, state);
 	    if (status && ret == NOTMUCH_STATUS_SUCCESS)
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-11-26 20:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-23  1:13 [PATCH] Handle message renames in mail spool Mikhail Gusarov
2009-11-26 15:40 ` Carl Worth
2009-11-26 16:38   ` Mikhail Gusarov
2009-11-26 18:23     ` Carl Worth
  -- strict thread matches above, loose matches on Subject: below --
2009-11-26 20:23 Mikhail Gusarov

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).