unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
From: Jani Nikula <jani@nikula.org>
To: notmuch@notmuchmail.org
Subject: [RFC PATCH 4/5] cli: use homebrew scandir in notmuch new add_files
Date: Fri, 15 Apr 2016 22:29:18 +0300	[thread overview]
Message-ID: <6e67ca7c973419213f2b9d47711b24cf7cb35d7a.1460748142.git.jani@nikula.org> (raw)
In-Reply-To: <cover.1460748142.git.jani@nikula.org>
In-Reply-To: <cover.1460748142.git.jani@nikula.org>

Split the scanning to files and subdirectories. Filter out ignored
files etc. in the scandir filter callback.

The end result is perhaps slightly easier to follow than before and it
should be easier to add clever ignore mechanisms like globbing in the
scandir filter callback, but it's not at all certain if this is worth
the trouble.

Some tests will fail due to changes in where files are ignored and
what debug logging is done; I didn't bother updating the logging or
the tests much at this point.
---
 notmuch-new.c | 275 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 148 insertions(+), 127 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 930cbbc9b86f..262895d466ae 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -20,6 +20,7 @@
 
 #include "notmuch-client.h"
 #include "tag-util.h"
+#include "scandir.h"
 
 #include <unistd.h>
 
@@ -151,9 +152,12 @@ generic_print_progress (const char *action, const char *object,
 }
 
 static int
-dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)
+dirent_sort_strcmp_name (const void *_a, const void *_b)
 {
-    return strcmp ((*a)->d_name, (*b)->d_name);
+    char * const *a = _a;
+    char * const *b = _b;
+
+    return strcmp (*a, *b);
 }
 
 /* Return the type of a directory entry relative to path as a stat(2)
@@ -206,18 +210,14 @@ dirent_type (const char *path, const struct dirent *entry)
  * Return 1 if the directory looks like a Maildir and 0 otherwise.
  */
 static int
-_entries_resemble_maildir (const char *path, struct dirent **entries, int count)
+_entries_resemble_maildir (char **subdirs, int count)
 {
     int i, found = 0;
 
     for (i = 0; i < count; i++) {
-	if (dirent_type (path, entries[i]) != S_IFDIR)
-	    continue;
-
-	if (strcmp(entries[i]->d_name, "new") == 0 ||
-	    strcmp(entries[i]->d_name, "cur") == 0 ||
-	    strcmp(entries[i]->d_name, "tmp") == 0)
-	{
+	if (strcmp(subdirs[i], "new") == 0 ||
+	    strcmp(subdirs[i], "cur") == 0 ||
+	    strcmp(subdirs[i], "tmp") == 0) {
 	    found++;
 	    if (found == 3)
 		return 1;
@@ -241,6 +241,49 @@ _entry_in_ignore_list (const char *entry, add_files_state_t *state)
     return FALSE;
 }
 
+struct filter {
+    const char *path;
+    add_files_state_t *state;
+    int entry_type;
+};
+
+static int filter_fn (const struct dirent *entry, void *context)
+{
+    struct filter *filter = context;
+    int entry_type;
+
+    /* Ignore any files/directories the user has configured to
+     * ignore.  We do this before dirent_type both for performance
+     * and because we don't care if dirent_type fails on entries
+     * that are explicitly ignored.
+     */
+    if (_entry_in_ignore_list (entry->d_name, filter->state)) {
+	if (filter->state->debug)
+	    printf ("(D) add_files, pass 1: explicitly ignoring %s/%s\n",
+		    filter->path, entry->d_name);
+	return 0;
+    }
+
+    /* We only want to descend into directories (and symlinks to
+     * directories). */
+    entry_type = dirent_type (filter->path, entry);
+    if (entry_type == -1) {
+	/* Be pessimistic, e.g. so we don't lose lots of mail just
+	 * because a user broke a symlink. */
+	fprintf (stderr, "Error reading file %s/%s: %s\n",
+		 filter->path, entry->d_name, strerror (errno));
+	return -1;
+    }
+
+    if (entry_type != filter->entry_type)
+	return 0;
+
+    if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0)
+	return 0;
+
+    return 1;
+}
+
 /* Add a single file to the database. */
 static notmuch_status_t
 add_file (notmuch_database_t *notmuch, const char *filename,
@@ -345,18 +388,22 @@ add_files (notmuch_database_t *notmuch,
 	   const char *path,
 	   add_files_state_t *state)
 {
-    struct dirent *entry = NULL;
     char *next = NULL;
     time_t fs_mtime, db_mtime;
     notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
-    struct dirent **fs_entries = NULL;
-    int i, num_fs_entries = 0, entry_type;
+    char **fs_files = NULL, **fs_subdirs = NULL;
+    int num_fs_files = 0, num_fs_subdirs = 0;
+    int i;
     notmuch_directory_t *directory;
     notmuch_filenames_t *db_files = NULL;
     notmuch_filenames_t *db_subdirs = NULL;
     time_t stat_time;
     struct stat st;
     notmuch_bool_t is_maildir;
+    struct filter filter = {
+	.path = path,
+	.state = state,
+    };
 
     if (stat (path, &st)) {
 	fprintf (stderr, "Error reading directory %s: %s\n",
@@ -410,11 +457,11 @@ add_files (notmuch_database_t *notmuch,
 
     /* If the database knows about this directory, then we sort based
      * on strcmp to match the database sorting. */
-    num_fs_entries = scandir (path, &fs_entries, 0,
-			      directory ?
-			      dirent_sort_strcmp_name : NULL);
-
-    if (num_fs_entries == -1) {
+    filter.entry_type = S_IFDIR;
+    num_fs_subdirs = scandirx (path, &fs_subdirs, filter_fn,
+			       directory ? dirent_sort_strcmp_name : NULL,
+			       &filter);
+    if (num_fs_subdirs == -1) {
 	fprintf (stderr, "Error opening directory %s: %s\n",
 		 path, strerror (errno));
 	/* We consider this a fatal error because, if a user moved a
@@ -426,50 +473,20 @@ add_files (notmuch_database_t *notmuch,
     }
 
     /* Pass 1: Recurse into all sub-directories. */
-    is_maildir = _entries_resemble_maildir (path, fs_entries, num_fs_entries);
-
-    for (i = 0; i < num_fs_entries; i++) {
-	if (interrupted)
-	    break;
+    is_maildir = _entries_resemble_maildir (fs_subdirs, num_fs_subdirs);
 
-	entry = fs_entries[i];
+    for (i = 0; i < num_fs_subdirs && !interrupted; i++) {
+	const char *name = fs_subdirs[i];
 
-	/* Ignore any files/directories the user has configured to
-	 * ignore.  We do this before dirent_type both for performance
-	 * and because we don't care if dirent_type fails on entries
-	 * that are explicitly ignored.
+	/*
+	 * Ignore the .notmuch directory and any "tmp" directory that
+	 * appears within a maildir.
 	 */
-	if (_entry_in_ignore_list (entry->d_name, state)) {
-	    if (state->debug)
-		printf ("(D) add_files, pass 1: explicitly ignoring %s/%s\n",
-			path, entry->d_name);
+	if ((is_maildir && strcmp (name, "tmp") == 0) ||
+	    strcmp (name, ".notmuch") == 0)
 	    continue;
-	}
 
-	/* We only want to descend into directories (and symlinks to
-	 * directories). */
-	entry_type = dirent_type (path, entry);
-	if (entry_type == -1) {
-	    /* Be pessimistic, e.g. so we don't lose lots of mail just
-	     * because a user broke a symlink. */
-	    fprintf (stderr, "Error reading file %s/%s: %s\n",
-		     path, entry->d_name, strerror (errno));
-	    return NOTMUCH_STATUS_FILE_ERROR;
-	} else if (entry_type != S_IFDIR) {
-	    continue;
-	}
-
-	/* Ignore special directories to avoid infinite recursion.
-	 * Also ignore the .notmuch directory and any "tmp" directory
-	 * that appears within a maildir.
-	 */
-	if (strcmp (entry->d_name, ".") == 0 ||
-	    strcmp (entry->d_name, "..") == 0 ||
-	    (is_maildir && strcmp (entry->d_name, "tmp") == 0) ||
-	    strcmp (entry->d_name, ".notmuch") == 0)
-	    continue;
-
-	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	next = talloc_asprintf (notmuch, "%s/%s", path, name);
 	status = add_files (notmuch, next, state);
 	if (status) {
 	    ret = status;
@@ -498,27 +515,71 @@ add_files (notmuch_database_t *notmuch,
 	db_subdirs = notmuch_directory_get_child_directories (directory);
     }
 
-    /* Pass 2: Scan for new files, removed files, and removed directories. */
-    for (i = 0; i < num_fs_entries; i++)
-    {
-	if (interrupted)
-	    break;
+    /* Pass 1½: Scan for removed directories. */
+    for (i = 0; i < num_fs_subdirs && !interrupted; i++) {
+	const char *name = fs_subdirs[i];
 
-        entry = fs_entries[i];
+	/* Check if we've walked past any names in db_subdirs. If so,
+	 * these have been deleted. */
+	while (notmuch_filenames_valid (db_subdirs) &&
+	       strcmp (notmuch_filenames_get (db_subdirs), name) < 0) {
+	    char *absolute = talloc_asprintf (state->removed_directories,
+					      "%s/%s", path,
+					      notmuch_filenames_get (db_subdirs));
 
-	/* Ignore files & directories user has configured to be ignored */
-	if (_entry_in_ignore_list (entry->d_name, state)) {
 	    if (state->debug)
-		printf ("(D) add_files, pass 2: explicitly ignoring %s/%s\n",
-			path, entry->d_name);
-	    continue;
+		printf ("(D) add_files, pass 2: queuing passed directory %s for deletion from database\n",
+			absolute);
+
+	    _filename_list_add (state->removed_directories, absolute);
+
+	    notmuch_filenames_move_to_next (db_subdirs);
 	}
 
-	/* Check if we've walked past any names in db_files or
-	 * db_subdirs. If so, these have been deleted. */
+	if (notmuch_filenames_valid (db_subdirs) &&
+	    strcmp (notmuch_filenames_get (db_subdirs), name) == 0)
+	    notmuch_filenames_move_to_next (db_subdirs);
+    }
+
+    /* Now that we've walked the whole filesystem subdir list, subdirs
+     * left over in the database list have been deleted. */
+    while (notmuch_filenames_valid (db_subdirs)) {
+	char *absolute = talloc_asprintf (state->removed_directories,
+					  "%s/%s", path,
+					  notmuch_filenames_get (db_subdirs));
+
+	if (state->debug)
+	    printf ("(D) add_files, pass 3: queuing leftover directory %s for deletion from database\n",
+		    absolute);
+
+	_filename_list_add (state->removed_directories, absolute);
+
+	notmuch_filenames_move_to_next (db_subdirs);
+    }
+
+    /* Pass 2: Scan for new and removed files. */
+    filter.entry_type = S_IFREG;
+    num_fs_files = scandirx (path, &fs_files, filter_fn,
+			     directory ? dirent_sort_strcmp_name : NULL,
+			     &filter);
+    if (num_fs_files == -1) {
+	fprintf (stderr, "Error opening directory %s: %s\n",
+		 path, strerror (errno));
+	/* We consider this a fatal error because, if a user moved a
+	 * message from another directory that we were able to scan
+	 * into this directory, skipping this directory will cause
+	 * that message to be lost. */
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    for (i = 0; i < num_fs_files && !interrupted; i++) {
+	const char *name = fs_files[i];
+
+	/* Check if we've walked past any names in db_files. If so,
+	 * these have been deleted. */
 	while (notmuch_filenames_valid (db_files) &&
-	       strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0)
-	{
+	       strcmp (notmuch_filenames_get (db_files), name) < 0) {
 	    char *absolute = talloc_asprintf (state->removed_files,
 					      "%s/%s", path,
 					      notmuch_filenames_get (db_files));
@@ -532,46 +593,16 @@ add_files (notmuch_database_t *notmuch,
 	    notmuch_filenames_move_to_next (db_files);
 	}
 
-	while (notmuch_filenames_valid (db_subdirs) &&
-	       strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0)
-	{
-	    const char *filename = notmuch_filenames_get (db_subdirs);
-
-	    if (strcmp (filename, entry->d_name) < 0)
-	    {
-		char *absolute = talloc_asprintf (state->removed_directories,
-						  "%s/%s", path, filename);
-		if (state->debug)
-		    printf ("(D) add_files, pass 2: queuing passed directory %s for deletion from database\n",
-			absolute);
-
-		_filename_list_add (state->removed_directories, absolute);
-	    }
-
-	    notmuch_filenames_move_to_next (db_subdirs);
-	}
-
-	/* Only add regular files (and symlinks to regular files). */
-	entry_type = dirent_type (path, entry);
-	if (entry_type == -1) {
-	    fprintf (stderr, "Error reading file %s/%s: %s\n",
-		     path, entry->d_name, strerror (errno));
-	    return NOTMUCH_STATUS_FILE_ERROR;
-	} else if (entry_type != S_IFREG) {
-	    continue;
-	}
-
 	/* Don't add a file that we've added before. */
 	if (notmuch_filenames_valid (db_files) &&
-	    strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0)
-	{
+	    strcmp (notmuch_filenames_get (db_files), name) == 0) {
 	    notmuch_filenames_move_to_next (db_files);
 	    continue;
 	}
 
 	/* We're now looking at a regular file that doesn't yet exist
 	 * in the database, so add it. */
-	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	next = talloc_asprintf (notmuch, "%s/%s", path, name);
 
 	state->processed_files++;
 
@@ -605,10 +636,9 @@ add_files (notmuch_database_t *notmuch,
     if (interrupted)
 	goto DONE;
 
-    /* Now that we've walked the whole filesystem list, anything left
-     * over in the database lists has been deleted. */
-    while (notmuch_filenames_valid (db_files))
-    {
+    /* Now that we've walked the whole filesystem file list, files
+     * left over in the database list have been deleted. */
+    while (notmuch_filenames_valid (db_files)) {
 	char *absolute = talloc_asprintf (state->removed_files,
 					  "%s/%s", path,
 					  notmuch_filenames_get (db_files));
@@ -621,21 +651,6 @@ add_files (notmuch_database_t *notmuch,
 	notmuch_filenames_move_to_next (db_files);
     }
 
-    while (notmuch_filenames_valid (db_subdirs))
-    {
-	char *absolute = talloc_asprintf (state->removed_directories,
-					  "%s/%s", path,
-					  notmuch_filenames_get (db_subdirs));
-
-	if (state->debug)
-	    printf ("(D) add_files, pass 3: queuing leftover directory %s for deletion from database\n",
-		    absolute);
-
-	_filename_list_add (state->removed_directories, absolute);
-
-	notmuch_filenames_move_to_next (db_subdirs);
-    }
-
     /* If the directory's mtime is the same as the wall-clock time
      * when we stat'ed the directory, we skip updating the mtime in
      * the database because a message could be delivered later in this
@@ -647,11 +662,17 @@ add_files (notmuch_database_t *notmuch,
   DONE:
     if (next)
 	talloc_free (next);
-    if (fs_entries) {
-	for (i = 0; i < num_fs_entries; i++)
-	    free (fs_entries[i]);
+    if (fs_subdirs) {
+	for (i = 0; i < num_fs_subdirs; i++)
+	    free (fs_subdirs[i]);
+
+	free (fs_subdirs);
+    }
+    if (fs_files) {
+	for (i = 0; i < num_fs_files; i++)
+	    free (fs_files[i]);
 
-	free (fs_entries);
+	free (fs_files);
     }
     if (db_subdirs)
 	notmuch_filenames_destroy (db_subdirs);
-- 
2.1.4

  parent reply	other threads:[~2016-04-15 19:31 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-15 19:29 [RFC PATCH 0/5] cli: notmuch new scandir rework Jani Nikula
2016-04-15 19:29 ` [RFC PATCH 1/5] cli: remove leftover dir variable Jani Nikula
2016-05-02 11:23   ` David Bremner
2016-04-15 19:29 ` [RFC PATCH 2/5] cli: drop inode sort order on directories unknown to the database Jani Nikula
2016-04-15 19:29 ` [RFC PATCH 3/5] util: add a homebrew scandir implementation Jani Nikula
2016-04-15 19:29 ` Jani Nikula [this message]
2016-04-15 19:29 ` [RFC PATCH 5/5] cli: convert count_files to new scandir Jani Nikula

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://notmuchmail.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6e67ca7c973419213f2b9d47711b24cf7cb35d7a.1460748142.git.jani@nikula.org \
    --to=jani@nikula.org \
    --cc=notmuch@notmuchmail.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).