From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 4571A6DE15D4 for ; Thu, 3 Sep 2015 12:40:36 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Amavis-Alert: BAD HEADER SECTION, Duplicate header field: "References" X-Spam-Flag: NO X-Spam-Score: -0.425 X-Spam-Level: X-Spam-Status: No, score=-0.425 tagged_above=-999 required=5 tests=[AWL=0.295, RCVD_IN_DNSWL_LOW=-0.7, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 4NCdwu0Yw4pW for ; Thu, 3 Sep 2015 12:40:34 -0700 (PDT) Received: from mail-wi0-f171.google.com (mail-wi0-f171.google.com [209.85.212.171]) by arlo.cworth.org (Postfix) with ESMTPS id DA4D76DE15E7 for ; Thu, 3 Sep 2015 12:40:26 -0700 (PDT) Received: by wicge5 with SMTP id ge5so84705833wic.0 for ; Thu, 03 Sep 2015 12:40:25 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:in-reply-to:references; bh=NlXTuY6QGPqQJwbKZYJuxHESc8MPqbilrd/29Rp56KY=; b=ecazczo3DDgja9izKe6j5vSEjzimdT4lIChtohGtJzltidQ/N7gVdhdpsctKCvfn0L o8BjfRVm0lpEWr9UaBEAWvDLNIeThvxKQpedMj2kexADMKuz14RVKgA4eGGfrFIrp5R4 XkWTi2m6QMtb70WM61MxwpcrcAlCtBgU9CLg3C8t/OFEBMh3Gto6krijoMHafKTTZnNw dBQxQ1NqKurOCju0PPJTQq0+gv/iHbraoYAtGYttxqKy9wn3sjOsgIHVnxbT6E78Oxqw vXpYphUWecIZHWj3+sFoFfD51HIhMRk+AuX9Rz//qvnp9SsYxKG0wKQyThSiKXy9xVA2 dkhw== X-Gm-Message-State: ALoCoQmLKjgeYYFJb4z06SoBNJroKC0WFBfEf1bmpF0SZFar09Xx49ipH/WxwFH5thPRF0fQ2Ort X-Received: by 10.194.82.167 with SMTP id j7mr54475903wjy.123.1441309225376; Thu, 03 Sep 2015 12:40:25 -0700 (PDT) Received: from localhost (mobile-access-bcee4f-131.dhcp.inet.fi. [188.238.79.131]) by smtp.gmail.com with ESMTPSA id i7sm625501wib.15.2015.09.03.12.40.24 (version=TLSv1/SSLv3 cipher=OTHER); Thu, 03 Sep 2015 12:40:24 -0700 (PDT) From: Jani Nikula To: notmuch@notmuchmail.org Subject: [PATCH v2 6/9] cli: change the data structure for notmuch address deduplication Date: Thu, 3 Sep 2015 22:40:02 +0300 Message-Id: X-Mailer: git-send-email 2.1.4 In-Reply-To: References: In-Reply-To: References: X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 03 Sep 2015 19:40:36 -0000 Currently we key the address hash table with the case sensitive "name
". Switch to case insensitive keying with just address, and store the case sensitive name and address in linked lists. This will be helpful in adding support for different deduplication schemes in the future. There will be a slight performance penalty for the current full case sensitive name + address deduplication, but this is simpler as a whole when other deduplication schemes are added, and I expect the schemes to be added to become more popular than the current default. Aparet from the possible performance penalty, the only user visible change should be the change in the output ordering for --output=count. The order is not guaranteed (and is based on hash table traversal) currently anyway, so this should be of no consequence. --- notmuch-client.h | 1 + notmuch-search.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index 882aa30563df..97d68d1158ac 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -48,6 +48,7 @@ typedef GMimeCryptoContext notmuch_crypto_context_t; #include #include #include +#include #include "talloc-extra.h" diff --git a/notmuch-search.c b/notmuch-search.c index 66404b561679..7c51d5df6bd4 100644 --- a/notmuch-search.c +++ b/notmuch-search.c @@ -265,30 +265,74 @@ static mailbox_t *new_mailbox (void *ctx, const char *name, const char *addr) return mailbox; } +static int mailbox_compare (const void *v1, const void *v2) +{ + const mailbox_t *m1 = v1, *m2 = v2; + int v; + + if (m1->name && m2->name) + v = strcmp (m1->name, m2->name); + else + v = !!m1->name - !!m2->name; + + if (! v) + v = strcmp (m1->addr, m2->addr); + + return v; +} + /* Returns TRUE iff name and addr is duplicate. If not, stores the * name/addr pair in order to detect subsequent duplicates. */ static notmuch_bool_t is_duplicate (const search_context_t *ctx, const char *name, const char *addr) { char *key; + GList *list, *l; mailbox_t *mailbox; - key = talloc_asprintf (ctx->format, "%s <%s>", name, addr); - if (! key) - return FALSE; + list = g_hash_table_lookup (ctx->addresses, addr); + if (list) { + mailbox_t find = { + .name = name, + .addr = addr, + }; + + l = g_list_find_custom (list, &find, mailbox_compare); + if (l) { + mailbox = l->data; + mailbox->count++; + return TRUE; + } - mailbox = g_hash_table_lookup (ctx->addresses, key); - if (mailbox) { - mailbox->count++; - talloc_free (key); - return TRUE; + mailbox = new_mailbox (ctx->format, name, addr); + if (! mailbox) + return FALSE; + + /* + * XXX: It would be more efficient to prepend to the list, but + * then we'd have to store the changed list head back to the + * hash table. This check is here just to avoid the compiler + * warning for unused result. + */ + if (list != g_list_append (list, mailbox)) + INTERNAL_ERROR ("appending to list changed list head\n"); + + return FALSE; } + key = talloc_strdup (ctx->format, addr); + if (! key) + return FALSE; + mailbox = new_mailbox (ctx->format, name, addr); if (! mailbox) return FALSE; - g_hash_table_insert (ctx->addresses, key, mailbox); + list = g_list_append (NULL, mailbox); + if (! list) + return FALSE; + + g_hash_table_insert (ctx->addresses, key, list); return FALSE; } @@ -401,12 +445,21 @@ _talloc_free_for_g_hash (void *ptr) } static void -print_hash_value (unused (gpointer key), gpointer value, gpointer user_data) +_list_free_for_g_hash (void *ptr) +{ + g_list_free_full (ptr, _talloc_free_for_g_hash); +} + +static void +print_list_value (void *mailbox, void *context) { - const mailbox_t *mailbox = value; - search_context_t *ctx = user_data; + print_mailbox (context, mailbox); +} - print_mailbox (ctx, mailbox); +static void +print_hash_value (unused (void *key), void *list, void *context) +{ + g_list_foreach (list, print_list_value, context); } static int @@ -792,8 +845,9 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[]) argc - opt_index, argv + opt_index)) return EXIT_FAILURE; - ctx->addresses = g_hash_table_new_full (g_str_hash, g_str_equal, - _talloc_free_for_g_hash, _talloc_free_for_g_hash); + ctx->addresses = g_hash_table_new_full (strcase_hash, strcase_equal, + _talloc_free_for_g_hash, + _list_free_for_g_hash); ret = do_search_messages (ctx); -- 2.1.4