unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
From: Michal Sojka <sojkam1@fel.cvut.cz>
To: notmuch@notmuchmail.org
Subject: [PATCH v2 10/10] cli: address: Add --filter-by option to configure address filtering
Date: Tue,  4 Nov 2014 00:50:22 +0100	[thread overview]
Message-ID: <1415058622-21162-11-git-send-email-sojkam1@fel.cvut.cz> (raw)
In-Reply-To: <1415058622-21162-1-git-send-email-sojkam1@fel.cvut.cz>

This option allows to configure the criterion for duplicate address
filtering. Without this option, all unique combinations of name and
address parts are printed. This option allows to filter the output
more, for example to only contain unique address parts.
---
 completion/notmuch-completion.bash |  6 +++-
 completion/notmuch-completion.zsh  |  1 +
 doc/man1/notmuch-address.rst       | 36 ++++++++++++++++++-
 notmuch-search.c                   | 48 +++++++++++++++++++++++--
 test/T097-address-filter-by.sh     | 73 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 160 insertions(+), 4 deletions(-)
 create mode 100755 test/T097-address-filter-by.sh

diff --git a/completion/notmuch-completion.bash b/completion/notmuch-completion.bash
index db152f3..2cb1586 100644
--- a/completion/notmuch-completion.bash
+++ b/completion/notmuch-completion.bash
@@ -310,7 +310,7 @@ _notmuch_search()
     ! $split &&
     case "${cur}" in
 	-*)
-	    local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate="
+	    local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate= --filter-by="
 	    compopt -o nospace
 	    COMPREPLY=( $(compgen -W "$options" -- ${cur}) )
 	    ;;
@@ -343,6 +343,10 @@ _notmuch_address()
 	    COMPREPLY=( $( compgen -W "true false flag all" -- "${cur}" ) )
 	    return
 	    ;;
+	--filter-by)
+	    COMPREPLY=( $( compgen -W "nameaddr name addr addrfold nameaddrfold" -- "${cur}" ) )
+	    return
+	    ;;
     esac
 
     ! $split &&
diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-completion.zsh
index 8968562..3758f1a 100644
--- a/completion/notmuch-completion.zsh
+++ b/completion/notmuch-completion.zsh
@@ -62,6 +62,7 @@ _notmuch_address()
   _arguments -s : \
     '--sort=[sort results]:sorting:((newest-first\:"reverse chronological order" oldest-first\:"chronological order"))' \
     '--output=[select what to output]:output:((sender recipients count))'
+    '--filter-by=[filter out duplicate addresses]:filter-by:((nameaddr\:"both name and address part" name\:"name part" addr\:"address part" addrfold\:"case-insensitive address part" nameaddrfold\:"name and case-insensitive address part"))'
 }
 
 _notmuch()
diff --git a/doc/man1/notmuch-address.rst b/doc/man1/notmuch-address.rst
index 18473a7..524ab91 100644
--- a/doc/man1/notmuch-address.rst
+++ b/doc/man1/notmuch-address.rst
@@ -11,7 +11,8 @@ DESCRIPTION
 ===========
 
 Search for messages matching the given search terms, and display the
-addresses from them. Duplicate addresses are filtered out.
+addresses from them. Duplicate addresses are filtered out. Filtering
+can be configured with the --filter-by option.
 
 See **notmuch-search-terms(7)** for details of the supported syntax for
 <search-terms>.
@@ -85,6 +86,39 @@ Supported options for **address** include
         is the number of matching non-excluded messages in the thread,
         rather than the number of matching messages.
 
+    ``--filter-by=``\ (**nameaddr**\ \|\ **name** \|\ **addr**\ \|\ **addrfold**\ \|\ **nameaddrfold**\)
+
+	Controls how to filter out duplicate addresses. The filtering
+	algorithm receives a sequence of email addresses and outputs
+	the same sequence without the addresses that are considered a
+	duplicate of a previously output address. What is considered a
+	duplicate depends on how the two addresses are compared:
+
+	**nameaddr** means that both name and address parts are
+	compared in case-sensitive manner. Therefore, all same looking
+	addresses strings are considered duplicate. This is the
+	default.
+
+	**name** means that only the name part is compared (in
+	case-sensitive manner). For example, the addresses "John Doe
+	<me@example.com>" and "John Doe <john@doe.name>" will be
+	considered duplicate.
+
+	**addr** means that only the address part is compared (in
+	case-sensitive manner). For example, the addresses "John Doe
+	<john@example.com>" and "Dr. John Doe <john@example.com>" will
+	be considered duplicate.
+
+	**addrfold** is like **addr**, but comparison is done in
+	canse-insensitive manner. For example, the addresses "John Doe
+	<john@example.com>" and "Dr. John Doe <JOHN@EXAMPLE.COM>" will
+	be considered duplicate.
+
+	**nameaddrfold** is like **nameaddr**, but address comparison
+	is done in canse-insensitive manner. For example, the
+	addresses "John Doe <john@example.com>" and "John Doe
+	<JOHN@EXAMPLE.COM>" will be considered duplicate.
+
 EXIT STATUS
 ===========
 
diff --git a/notmuch-search.c b/notmuch-search.c
index d99e530..04e33c6 100644
--- a/notmuch-search.c
+++ b/notmuch-search.c
@@ -43,6 +43,14 @@ typedef enum {
     NOTMUCH_FORMAT_SEXP
 } format_sel_t;
 
+typedef enum {
+    FILTER_BY_NAMEADDR = 0,
+    FILTER_BY_NAME,
+    FILTER_BY_ADDR,
+    FILTER_BY_ADDRFOLD,
+    FILTER_BY_NAMEADDRFOLD,
+} filter_by_t;
+
 typedef struct {
     notmuch_database_t *notmuch;
     format_sel_t format_sel;
@@ -55,6 +63,7 @@ typedef struct {
     int limit;
     int dupe;
     GHashTable *addresses;
+    filter_by_t filter_by;
 } search_context_t;
 
 typedef struct {
@@ -243,15 +252,42 @@ do_search_threads (search_context_t *ctx)
     return 0;
 }
 
-/* Returns TRUE iff name and addr is duplicate. */
+/* Returns TRUE iff name and/or addr is considered duplicate. */
 static notmuch_bool_t
 is_duplicate (const search_context_t *ctx, const char *name, const char *addr)
 {
     notmuch_bool_t duplicate;
     char *key;
+    gchar *addrfold = NULL;
     mailbox_t *mailbox;
 
-    key = talloc_asprintf (ctx->format, "%s <%s>", name, addr);
+    if (ctx->filter_by == FILTER_BY_ADDRFOLD ||
+	ctx->filter_by == FILTER_BY_NAMEADDRFOLD)
+	addrfold = g_utf8_casefold (addr, -1);
+
+    switch (ctx->filter_by) {
+    case FILTER_BY_NAMEADDR:
+	key = talloc_asprintf (ctx->format, "%s <%s>", name, addr);
+	break;
+    case FILTER_BY_NAMEADDRFOLD:
+	key = talloc_asprintf (ctx->format, "%s <%s>", name, addrfold);
+	break;
+    case FILTER_BY_NAME:
+	key = talloc_strdup (ctx->format, name); /* !name results in !key */
+	break;
+    case FILTER_BY_ADDR:
+	key = talloc_strdup (ctx->format, addr);
+	break;
+    case FILTER_BY_ADDRFOLD:
+	key = talloc_strdup (ctx->format, addrfold);
+	break;
+    default:
+	INTERNAL_ERROR("invalid --filter-by flags");
+    }
+
+    if (addrfold)
+	g_free (addrfold);
+
     if (! key)
 	return FALSE;
 
@@ -721,10 +757,18 @@ notmuch_address_command (notmuch_config_t *config, int argc, char *argv[])
 				  { "recipients", OUTPUT_RECIPIENTS },
 				  { "count", OUTPUT_COUNT },
 				  { 0, 0 } } },
+	{ NOTMUCH_OPT_KEYWORD, &ctx->filter_by, "filter-by", 'b',
+	  (notmuch_keyword_t []){ { "nameaddr", FILTER_BY_NAMEADDR },
+				  { "name", FILTER_BY_NAME },
+				  { "addr", FILTER_BY_ADDR },
+				  { "addrfold", FILTER_BY_ADDRFOLD },
+				  { "nameaddrfold", FILTER_BY_NAMEADDRFOLD },
+				  { 0, 0 } } },
 	{ NOTMUCH_OPT_INHERIT, &common_options, NULL, 0, 0 },
 	{ 0, 0, 0, 0, 0 }
     };
 
+    ctx->filter_by = FILTER_BY_NAMEADDR,
     opt_index = parse_arguments (argc, argv, options, 1);
     if (opt_index < 0)
 	return EXIT_FAILURE;
diff --git a/test/T097-address-filter-by.sh b/test/T097-address-filter-by.sh
new file mode 100755
index 0000000..544d8e8
--- /dev/null
+++ b/test/T097-address-filter-by.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+test_description='duplicite address filtering in "notmuch address"'
+. ./test-lib.sh
+
+add_message '[to]="John Doe <foo@example.com>, John Doe <bar@example.com>"'
+add_message '[to]="\"Doe, John\" <foo@example.com>"' '[cc]="John Doe <Bar@Example.COM>"'
+add_message '[to]="\"Doe, John\" <foo@example.com>"' '[bcc]="John Doe <Bar@Example.COM>"'
+
+test_begin_subtest "--output=recipients"
+notmuch address --output=recipients "*" >OUTPUT
+cat <<EOF >EXPECTED
+John Doe <foo@example.com>
+John Doe <bar@example.com>
+"Doe, John" <foo@example.com>
+John Doe <Bar@Example.COM>
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "--output=recipients --filter-by=nameaddr"
+notmuch address --output=recipients --filter-by=nameaddr "*" >OUTPUT
+# The same as above
+cat <<EOF >EXPECTED
+John Doe <foo@example.com>
+John Doe <bar@example.com>
+"Doe, John" <foo@example.com>
+John Doe <Bar@Example.COM>
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "--output=recipients --filter-by=name"
+notmuch address --output=recipients --filter-by=name "*" >OUTPUT
+cat <<EOF >EXPECTED
+John Doe <foo@example.com>
+"Doe, John" <foo@example.com>
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "--output=recipients --filter-by=addr"
+notmuch address --output=recipients --filter-by=addr "*" >OUTPUT
+cat <<EOF >EXPECTED
+John Doe <foo@example.com>
+John Doe <bar@example.com>
+John Doe <Bar@Example.COM>
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "--output=recipients --filter-by=addrfold"
+notmuch address --output=recipients --filter-by=addrfold "*" >OUTPUT
+cat <<EOF >EXPECTED
+John Doe <foo@example.com>
+John Doe <bar@example.com>
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "--output=recipients --filter-by=nameaddrfold"
+notmuch address --output=recipients --filter-by=nameaddrfold "*" >OUTPUT
+cat <<EOF >EXPECTED
+John Doe <foo@example.com>
+John Doe <bar@example.com>
+"Doe, John" <foo@example.com>
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "--output=recipients --filter-by=nameaddrfold --output=count"
+notmuch address --output=recipients --filter-by=nameaddrfold --output=count "*" | sort -n >OUTPUT
+cat <<EOF >EXPECTED
+1	John Doe <foo@example.com>
+2	"Doe, John" <foo@example.com>
+3	John Doe <bar@example.com>
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_done
-- 
2.1.1

  parent reply	other threads:[~2014-11-03 23:51 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-03 23:50 [PATCH v2 00/10] "notmuch address" command Michal Sojka
2014-11-03 23:50 ` [PATCH v2 01/10] cli: search: Rename options to context Michal Sojka
2014-11-04  6:24   ` David Bremner
2014-11-03 23:50 ` [PATCH v2 02/10] cli: search: Move more variables into search_context_t Michal Sojka
2014-11-03 23:50 ` [PATCH v2 03/10] cli: search: Convert ctx. to ctx-> Michal Sojka
2014-11-04  6:29   ` David Bremner
2014-11-03 23:50 ` [PATCH v2 04/10] cli: search: Split notmuch_search_command to smaller functions Michal Sojka
2014-11-03 23:50 ` [PATCH v2 05/10] cli: add support for hierarchical command line option arrays Michal Sojka
2014-11-04  6:36   ` David Bremner
2014-11-04  6:38     ` David Bremner
2014-11-03 23:50 ` [PATCH v2 06/10] cli: Introduce "notmuch address" command Michal Sojka
2014-11-04  6:52   ` David Bremner
2014-11-04  9:40     ` Tomi Ollila
2014-11-04 21:59     ` Michal Sojka
2014-11-04 22:12       ` David Bremner
2014-11-04  9:04   ` Mark Walters
2014-11-04 22:15     ` Michal Sojka
2014-11-05 11:22       ` Mark Walters
2014-11-05 12:23         ` Michal Sojka
2014-11-05 12:48           ` Mark Walters
2014-11-03 23:50 ` [PATCH v2 07/10] cli: search: Convert --output to keyword argument Michal Sojka
2014-11-04  8:58   ` Mark Walters
2014-11-04  9:08     ` Mark Walters
2014-11-04 11:26     ` Michal Sojka
2014-11-03 23:50 ` [PATCH v2 08/10] cli: address: Do not output duplicate addresses Michal Sojka
2014-11-04  7:05   ` David Bremner
2014-11-04 11:36     ` Michal Sojka
2014-11-03 23:50 ` [PATCH v2 09/10] cli: address: Add --output=count Michal Sojka
2014-11-04  9:11   ` Mark Walters
2014-11-03 23:50 ` Michal Sojka [this message]
2014-11-04  9:23 ` [PATCH v2 00/10] "notmuch address" command Mark Walters
2014-11-04 20:33 ` Tomi Ollila

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://notmuchmail.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1415058622-21162-11-git-send-email-sojkam1@fel.cvut.cz \
    --to=sojkam1@fel.cvut.cz \
    --cc=notmuch@notmuchmail.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).