unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [PATCH v2 0/8] Improve charset and cid: handling
@ 2015-01-24 21:16 Austin Clements
  2015-01-24 21:16 ` [PATCH v2 1/8] emacs: Track full message and part descriptor in w3m CID store Austin Clements
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:16 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

This is v2 of id:1398105468-14317-1-git-send-email-amdragon@mit.edu.
This improves some comments/documentation, fixes a bug that caused
cryptographic processing to not happen on HTML parts, and addresses
some byte compiler warnings on Emacs 23.  This version has also been
rebased against the several months of changes that happened on master
since v1 (which, remarkably, was trivial).

The diff from v1 is below.

diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el
index 83cbf2f..f8e5165 100644
--- a/emacs/notmuch-lib.el
+++ b/emacs/notmuch-lib.el
@@ -535,7 +535,10 @@ (defun notmuch-get-bodypart-binary (msg part process-crypto &optional cache)
 This returns the \"raw\" content of the given part after content
 transfer decoding, but with no further processing (see the
 discussion of --format=raw in man notmuch-show).  In particular,
-this does no charset conversion."
+this does no charset conversion.
+
+If CACHE is non-nil, the content of this part will be saved in
+MSG (if it isn't already)."
   (let ((data (plist-get part :binary-content)))
     (when (not data)
       (let ((args `("show" "--format=raw"
@@ -558,6 +561,8 @@ (defun notmuch-get-bodypart-binary (msg part process-crypto &optional cache)
 	    (apply #'call-process notmuch-command nil '(t nil) nil args)
 	    (setq data (buffer-string)))))
       (when cache
+	;; Cheat.  part is non-nil, and `plist-put' always modifies
+	;; the list in place if it's non-nil.
 	(plist-put part :binary-content data)))
     data))
 
@@ -567,7 +572,10 @@ (defun notmuch-get-bodypart-text (msg part process-crypto &optional cache)
 This returns the content of the given part as a multibyte Lisp
 string after performing content transfer decoding and any
 necessary charset decoding.  It is an error to use this for
-non-text/* parts."
+non-text/* parts.
+
+If CACHE is non-nil, the content of this part will be saved in
+MSG (if it isn't already)."
   (let ((content (plist-get part :content)))
     (when (not content)
       ;; Use show --format=sexp to fetch decoded content
diff --git a/emacs/notmuch-show.el b/emacs/notmuch-show.el
index d190711..66350d4 100644
--- a/emacs/notmuch-show.el
+++ b/emacs/notmuch-show.el
@@ -781,10 +781,14 @@ (defun notmuch-show-insert-part-text/html (msg part content-type nth depth butto
     (let ((mm-inline-text-html-with-w3m-keymap nil))
       (notmuch-show-insert-part-*/* msg part content-type nth depth button))))
 
+;; These functions are used by notmuch-show--insert-part-text/html-shr
+(declare-function libxml-parse-html-region "xml.c")
+(declare-function shr-insert-document "shr")
+
 (defun notmuch-show--insert-part-text/html-shr (msg part)
   ;; Make sure shr is loaded before we start let-binding its globals
   (require 'shr)
-  (let ((dom (let (process-crypto notmuch-show-process-crypto)
+  (let ((dom (let ((process-crypto notmuch-show-process-crypto))
 	       (with-temp-buffer
 		 (insert (notmuch-get-bodypart-text msg part process-crypto))
 		 (libxml-parse-html-region (point-min) (point-max)))))

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 1/8] emacs: Track full message and part descriptor in w3m CID store
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
@ 2015-01-24 21:16 ` Austin Clements
  2015-01-24 21:16 ` [PATCH v2 2/8] emacs: Create an API for fetching parts as undecoded binary Austin Clements
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:16 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

This will simplify later changes.
---
 emacs/notmuch-show.el | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/emacs/notmuch-show.el b/emacs/notmuch-show.el
index 87b4881..df2389e 100644
--- a/emacs/notmuch-show.el
+++ b/emacs/notmuch-show.el
@@ -562,35 +562,26 @@ (defvar w3m-current-buffer) ;; From `w3m.el'.
 (defvar notmuch-show-w3m-cid-store nil)
 (make-variable-buffer-local 'notmuch-show-w3m-cid-store)
 
-(defun notmuch-show-w3m-cid-store-internal (content-id
-					    message-id
-					    part-number
-					    content-type
-					    content)
-  (push (list content-id
-	      message-id
-	      part-number
-	      content-type
-	      content)
+(defun notmuch-show-w3m-cid-store-internal (content-id msg part content)
+  (push (list content-id msg part content)
 	notmuch-show-w3m-cid-store))
 
 (defun notmuch-show-w3m-cid-store (msg part)
   (let ((content-id (plist-get part :content-id)))
     (when content-id
       (notmuch-show-w3m-cid-store-internal (concat "cid:" content-id)
-					   (plist-get msg :id)
-					   (plist-get part :id)
-					   (plist-get part :content-type)
-					   nil))))
+					   msg part nil))))
 
 (defun notmuch-show-w3m-cid-retrieve (url &rest args)
   (let ((matching-part (with-current-buffer w3m-current-buffer
 			 (assoc url notmuch-show-w3m-cid-store))))
     (if matching-part
-	(let ((message-id (nth 1 matching-part))
-	      (part-number (nth 2 matching-part))
-	      (content-type (nth 3 matching-part))
-	      (content (nth 4 matching-part)))
+	(let* ((msg (nth 1 matching-part))
+	       (part (nth 2 matching-part))
+	       (content (nth 3 matching-part))
+	       (message-id (plist-get msg :id))
+	       (part-number (plist-get part :id))
+	       (content-type (plist-get part :content-type)))
 	  ;; If we don't already have the content, get it and cache
 	  ;; it, as some messages reference the same cid: part many
 	  ;; times (hundreds!), which results in many calls to
@@ -599,11 +590,7 @@ (defun notmuch-show-w3m-cid-retrieve (url &rest args)
 	    (setq content (notmuch-get-bodypart-internal (notmuch-id-to-query message-id)
 							      part-number notmuch-show-process-crypto))
 	    (with-current-buffer w3m-current-buffer
-	      (notmuch-show-w3m-cid-store-internal url
-						   message-id
-						   part-number
-						   content-type
-						   content)))
+	      (notmuch-show-w3m-cid-store-internal url msg part content)))
 	  (insert content)
 	  content-type)
       nil)))
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 2/8] emacs: Create an API for fetching parts as undecoded binary
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
  2015-01-24 21:16 ` [PATCH v2 1/8] emacs: Track full message and part descriptor in w3m CID store Austin Clements
@ 2015-01-24 21:16 ` Austin Clements
  2015-01-24 21:16 ` [PATCH v2 3/8] emacs: Remove broken `notmuch-get-bodypart-content' API Austin Clements
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:16 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

The new function, `notmuch-get-bodypart-binary', replaces
`notmuch-get-bodypart-internal'.  Whereas the old function was really
meant for internal use in `notmuch-get-bodypart-content', it was used
in a few other places.  Since the difference between
`notmuch-get-bodypart-content' and `notmuch-get-bodypart-internal' was
unclear, these other uses were always confusing and potentially
inconsistent.  The new call clearly requests the part as undecoded
binary.

This is step 1 of 2 in separating `notmuch-get-bodypart-content' into
two APIs for retrieving either undecoded binary or decoded text.
---
 emacs/notmuch-lib.el  | 28 ++++++++++++++--------------
 emacs/notmuch-show.el | 22 +++++++++-------------
 2 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el
index fd25f7c..d4b6684 100644
--- a/emacs/notmuch-lib.el
+++ b/emacs/notmuch-lib.el
@@ -529,25 +529,25 @@ (defun notmuch-parts-filter-by-type (parts type)
    (lambda (part) (notmuch-match-content-type (plist-get part :content-type) type))
    parts))
 
-;; Helper for parts which are generally not included in the default
-;; SEXP output.
-(defun notmuch-get-bodypart-internal (query part-number process-crypto)
-  (let ((args '("show" "--format=raw"))
-	(part-arg (format "--part=%s" part-number)))
-    (setq args (append args (list part-arg)))
-    (if process-crypto
-	(setq args (append args '("--decrypt"))))
-    (setq args (append args (list query)))
+(defun notmuch-get-bodypart-binary (msg part process-crypto)
+  "Return the unprocessed content of PART in MSG.
+
+This returns the \"raw\" content of the given part after content
+transfer decoding, but with no further processing (see the
+discussion of --format=raw in man notmuch-show).  In particular,
+this does no charset conversion."
+  (let ((args `("show" "--format=raw"
+		,(format "--part=%d" (plist-get part :id))
+		,@(when process-crypto '("--decrypt"))
+		,(notmuch-id-to-query (plist-get msg :id)))))
     (with-temp-buffer
       (let ((coding-system-for-read 'no-conversion))
-	(progn
-	  (apply 'call-process (append (list notmuch-command nil (list t nil) nil) args))
-	  (buffer-string))))))
+	(apply #'call-process notmuch-command nil '(t nil) nil args)
+	(buffer-string)))))
 
 (defun notmuch-get-bodypart-content (msg part process-crypto)
   (or (plist-get part :content)
-      (notmuch-get-bodypart-internal (notmuch-id-to-query (plist-get msg :id))
-				     (plist-get part :id) process-crypto)))
+      (notmuch-get-bodypart-binary msg part process-crypto)))
 
 ;; Workaround: The call to `mm-display-part' below triggers a bug in
 ;; Emacs 24 if it attempts to use the shr renderer to display an HTML
diff --git a/emacs/notmuch-show.el b/emacs/notmuch-show.el
index df2389e..b3e339e 100644
--- a/emacs/notmuch-show.el
+++ b/emacs/notmuch-show.el
@@ -579,16 +579,14 @@ (defun notmuch-show-w3m-cid-retrieve (url &rest args)
 	(let* ((msg (nth 1 matching-part))
 	       (part (nth 2 matching-part))
 	       (content (nth 3 matching-part))
-	       (message-id (plist-get msg :id))
-	       (part-number (plist-get part :id))
 	       (content-type (plist-get part :content-type)))
 	  ;; If we don't already have the content, get it and cache
 	  ;; it, as some messages reference the same cid: part many
 	  ;; times (hundreds!), which results in many calls to
 	  ;; `notmuch part'.
 	  (unless content
-	    (setq content (notmuch-get-bodypart-internal (notmuch-id-to-query message-id)
-							      part-number notmuch-show-process-crypto))
+	    (setq content (notmuch-get-bodypart-binary
+			   msg part notmuch-show-process-crypto))
 	    (with-current-buffer w3m-current-buffer
 	      (notmuch-show-w3m-cid-store-internal url msg part content)))
 	  (insert content)
@@ -2162,15 +2160,14 @@ (defun notmuch-show-stash-git-send-email (&optional no-in-reply-to)
 
 ;; Interactive part functions and their helpers
 
-(defun notmuch-show-generate-part-buffer (message-id nth)
+(defun notmuch-show-generate-part-buffer (msg part)
   "Return a temporary buffer containing the specified part's content."
   (let ((buf (generate-new-buffer " *notmuch-part*"))
 	(process-crypto notmuch-show-process-crypto))
     (with-current-buffer buf
-      (setq notmuch-show-process-crypto process-crypto)
-      ;; Always acquires the part via `notmuch part', even if it is
-      ;; available in the SEXP output.
-      (insert (notmuch-get-bodypart-internal message-id nth notmuch-show-process-crypto)))
+      ;; This is always used in the content of mm handles, which
+      ;; expect undecoded, binary part content.
+      (insert (notmuch-get-bodypart-binary msg part process-crypto)))
     buf))
 
 (defun notmuch-show-current-part-handle ()
@@ -2178,10 +2175,9 @@ (defun notmuch-show-current-part-handle ()
 
 This creates a temporary buffer for the part's content; the
 caller is responsible for killing this buffer as appropriate."
-  (let* ((part (notmuch-show-get-part-properties))
-	 (message-id (notmuch-show-get-message-id))
-	 (nth (plist-get part :id))
-	 (buf (notmuch-show-generate-part-buffer message-id nth))
+  (let* ((msg (notmuch-show-get-message-properties))
+	 (part (notmuch-show-get-part-properties))
+	 (buf (notmuch-show-generate-part-buffer msg part))
 	 (computed-type (plist-get part :computed-type))
 	 (filename (plist-get part :filename))
 	 (disposition (if filename `(attachment (filename . ,filename)))))
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 3/8] emacs: Remove broken `notmuch-get-bodypart-content' API
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
  2015-01-24 21:16 ` [PATCH v2 1/8] emacs: Track full message and part descriptor in w3m CID store Austin Clements
  2015-01-24 21:16 ` [PATCH v2 2/8] emacs: Create an API for fetching parts as undecoded binary Austin Clements
@ 2015-01-24 21:16 ` Austin Clements
  2015-01-24 21:16 ` [PATCH v2 4/8] emacs: Return unibyte strings for binary part data Austin Clements
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:16 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

`notmuch-get-bodypart-content' could do two very different things,
depending on conditions: for text/* parts other than text/html, it
would return the part content as a multibyte Lisp string *after*
charset conversion, while for other parts (including text/html), it
would return binary part content without charset conversion.

This commit completes the split of `notmuch-get-bodypart-content' into
two different and explicit APIs: `notmuch-get-bodypart-binary' and
`notmuch-get-bodypart-text'.  It updates all callers to use one or the
other depending on what's appropriate.
---
 emacs/notmuch-lib.el  | 37 ++++++++++++++++++++++++++++---------
 emacs/notmuch-show.el |  8 ++++----
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el
index d4b6684..a0a95d8 100644
--- a/emacs/notmuch-lib.el
+++ b/emacs/notmuch-lib.el
@@ -545,9 +545,25 @@ (defun notmuch-get-bodypart-binary (msg part process-crypto)
 	(apply #'call-process notmuch-command nil '(t nil) nil args)
 	(buffer-string)))))
 
-(defun notmuch-get-bodypart-content (msg part process-crypto)
-  (or (plist-get part :content)
-      (notmuch-get-bodypart-binary msg part process-crypto)))
+(defun notmuch-get-bodypart-text (msg part process-crypto)
+  "Return the text content of PART in MSG.
+
+This returns the content of the given part as a multibyte Lisp
+string after performing content transfer decoding and any
+necessary charset decoding.  It is an error to use this for
+non-text/* parts."
+  (let ((content (plist-get part :content)))
+    (when (not content)
+      ;; Use show --format=sexp to fetch decoded content
+      (let* ((args `("show" "--format=sexp" "--include-html"
+		     ,(format "--part=%s" (plist-get part :id))
+		     ,@(when process-crypto '("--decrypt"))
+		     ,(notmuch-id-to-query (plist-get msg :id))))
+	     (npart (apply #'notmuch-call-notmuch-sexp args)))
+	(setq content (plist-get npart :content))
+	(when (not content)
+	  (error "Internal error: No :content from %S" args))))
+    content))
 
 ;; Workaround: The call to `mm-display-part' below triggers a bug in
 ;; Emacs 24 if it attempts to use the shr renderer to display an HTML
@@ -568,18 +584,21 @@ (defun notmuch-mm-display-part-inline (msg part content-type process-crypto)
 current buffer, if possible."
   (let ((display-buffer (current-buffer)))
     (with-temp-buffer
-      ;; In case there is :content, the content string is already converted
-      ;; into emacs internal format. `gnus-decoded' is a fake charset,
-      ;; which means no further decoding (to be done by mm- functions).
-      (let* ((charset (if (plist-member part :content)
-			  'gnus-decoded
+      ;; In case we already have :content, use it and tell mm-* that
+      ;; it's already been charset-decoded by using the fake
+      ;; `gnus-decoded' charset.  Otherwise, we'll fetch the binary
+      ;; part content and let mm-* decode it.
+      (let* ((have-content (plist-member part :content))
+	     (charset (if have-content 'gnus-decoded
 			(plist-get part :content-charset)))
 	     (handle (mm-make-handle (current-buffer) `(,content-type (charset . ,charset)))))
 	;; If the user wants the part inlined, insert the content and
 	;; test whether we are able to inline it (which includes both
 	;; capability and suitability tests).
 	(when (mm-inlined-p handle)
-	  (insert (notmuch-get-bodypart-content msg part process-crypto))
+	  (if have-content
+	      (insert (notmuch-get-bodypart-text msg part process-crypto))
+	    (insert (notmuch-get-bodypart-binary msg part process-crypto)))
 	  (when (mm-inlinable-p handle)
 	    (set-buffer display-buffer)
 	    (mm-display-part handle)
diff --git a/emacs/notmuch-show.el b/emacs/notmuch-show.el
index b3e339e..f29428a 100644
--- a/emacs/notmuch-show.el
+++ b/emacs/notmuch-show.el
@@ -702,7 +702,7 @@ (defun notmuch-show-insert-part-text/plain (msg part content-type nth depth butt
   (let ((start (if button
 		   (button-start button)
 		 (point))))
-    (insert (notmuch-get-bodypart-content msg part notmuch-show-process-crypto))
+    (insert (notmuch-get-bodypart-text msg part notmuch-show-process-crypto))
     (save-excursion
       (save-restriction
 	(narrow-to-region start (point-max))
@@ -711,9 +711,9 @@ (defun notmuch-show-insert-part-text/plain (msg part content-type nth depth butt
 
 (defun notmuch-show-insert-part-text/calendar (msg part content-type nth depth button)
   (insert (with-temp-buffer
-	    (insert (notmuch-get-bodypart-content msg part notmuch-show-process-crypto))
-	    ;; notmuch-get-bodypart-content provides "raw", non-converted
-	    ;; data. Replace CRLF with LF before icalendar can use it.
+	    (insert (notmuch-get-bodypart-text msg part notmuch-show-process-crypto))
+	    ;; notmuch-get-bodypart-text does no newline conversion.
+	    ;; Replace CRLF with LF before icalendar can use it.
 	    (goto-char (point-min))
 	    (while (re-search-forward "\r\n" nil t)
 	      (replace-match "\n" nil nil))
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 4/8] emacs: Return unibyte strings for binary part data
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
                   ` (2 preceding siblings ...)
  2015-01-24 21:16 ` [PATCH v2 3/8] emacs: Remove broken `notmuch-get-bodypart-content' API Austin Clements
@ 2015-01-24 21:16 ` Austin Clements
  2015-01-24 21:17 ` [PATCH v2 5/8] emacs: Support caching in notmuch-get-bodypart-{binary, text} Austin Clements
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:16 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

Unibyte strings are meant for representing binary data.  In practice,
using unibyte versus multibyte strings affects *almost* nothing.  It
does happen to matter if we use the binary data in an image descriptor
(which is, helpfully, not documented anywhere and getting it wrong
results in opaque errors like "Not a PNG image: <giant binary spew
that is, in fact, a PNG image>").
---
 emacs/notmuch-lib.el | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el
index a0a95d8..3154725 100644
--- a/emacs/notmuch-lib.el
+++ b/emacs/notmuch-lib.el
@@ -530,7 +530,7 @@ (defun notmuch-parts-filter-by-type (parts type)
    parts))
 
 (defun notmuch-get-bodypart-binary (msg part process-crypto)
-  "Return the unprocessed content of PART in MSG.
+  "Return the unprocessed content of PART in MSG as a unibyte string.
 
 This returns the \"raw\" content of the given part after content
 transfer decoding, but with no further processing (see the
@@ -541,6 +541,16 @@ (defun notmuch-get-bodypart-binary (msg part process-crypto)
 		,@(when process-crypto '("--decrypt"))
 		,(notmuch-id-to-query (plist-get msg :id)))))
     (with-temp-buffer
+      ;; Emacs internally uses a UTF-8-like multibyte string
+      ;; representation by default (regardless of the coding system,
+      ;; which only affects how it goes from outside data to this
+      ;; internal representation).  This *almost* never matters.
+      ;; Annoyingly, it does matter if we use this data in an image
+      ;; descriptor, since Emacs will use its internal data buffer
+      ;; directly and this multibyte representation corrupts binary
+      ;; image formats.  Since the caller is asking for binary data, a
+      ;; unibyte string is a more appropriate representation anyway.
+      (set-buffer-multibyte nil)
       (let ((coding-system-for-read 'no-conversion))
 	(apply #'call-process notmuch-command nil '(t nil) nil args)
 	(buffer-string)))))
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 5/8] emacs: Support caching in notmuch-get-bodypart-{binary, text}
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
                   ` (3 preceding siblings ...)
  2015-01-24 21:16 ` [PATCH v2 4/8] emacs: Return unibyte strings for binary part data Austin Clements
@ 2015-01-24 21:17 ` Austin Clements
  2015-01-24 21:17 ` [PATCH v2 6/8] emacs: Use generalized content caching in w3m CID code Austin Clements
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:17 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

(The actual code change here is small, but requires re-indenting
existing code.)
---
 emacs/notmuch-lib.el | 64 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el
index 3154725..f8e5165 100644
--- a/emacs/notmuch-lib.el
+++ b/emacs/notmuch-lib.el
@@ -529,39 +529,53 @@ (defun notmuch-parts-filter-by-type (parts type)
    (lambda (part) (notmuch-match-content-type (plist-get part :content-type) type))
    parts))
 
-(defun notmuch-get-bodypart-binary (msg part process-crypto)
+(defun notmuch-get-bodypart-binary (msg part process-crypto &optional cache)
   "Return the unprocessed content of PART in MSG as a unibyte string.
 
 This returns the \"raw\" content of the given part after content
 transfer decoding, but with no further processing (see the
 discussion of --format=raw in man notmuch-show).  In particular,
-this does no charset conversion."
-  (let ((args `("show" "--format=raw"
-		,(format "--part=%d" (plist-get part :id))
-		,@(when process-crypto '("--decrypt"))
-		,(notmuch-id-to-query (plist-get msg :id)))))
-    (with-temp-buffer
-      ;; Emacs internally uses a UTF-8-like multibyte string
-      ;; representation by default (regardless of the coding system,
-      ;; which only affects how it goes from outside data to this
-      ;; internal representation).  This *almost* never matters.
-      ;; Annoyingly, it does matter if we use this data in an image
-      ;; descriptor, since Emacs will use its internal data buffer
-      ;; directly and this multibyte representation corrupts binary
-      ;; image formats.  Since the caller is asking for binary data, a
-      ;; unibyte string is a more appropriate representation anyway.
-      (set-buffer-multibyte nil)
-      (let ((coding-system-for-read 'no-conversion))
-	(apply #'call-process notmuch-command nil '(t nil) nil args)
-	(buffer-string)))))
-
-(defun notmuch-get-bodypart-text (msg part process-crypto)
+this does no charset conversion.
+
+If CACHE is non-nil, the content of this part will be saved in
+MSG (if it isn't already)."
+  (let ((data (plist-get part :binary-content)))
+    (when (not data)
+      (let ((args `("show" "--format=raw"
+		    ,(format "--part=%d" (plist-get part :id))
+		    ,@(when process-crypto '("--decrypt"))
+		    ,(notmuch-id-to-query (plist-get msg :id)))))
+	(with-temp-buffer
+	  ;; Emacs internally uses a UTF-8-like multibyte string
+	  ;; representation by default (regardless of the coding
+	  ;; system, which only affects how it goes from outside data
+	  ;; to this internal representation).  This *almost* never
+	  ;; matters.  Annoyingly, it does matter if we use this data
+	  ;; in an image descriptor, since Emacs will use its internal
+	  ;; data buffer directly and this multibyte representation
+	  ;; corrupts binary image formats.  Since the caller is
+	  ;; asking for binary data, a unibyte string is a more
+	  ;; appropriate representation anyway.
+	  (set-buffer-multibyte nil)
+	  (let ((coding-system-for-read 'no-conversion))
+	    (apply #'call-process notmuch-command nil '(t nil) nil args)
+	    (setq data (buffer-string)))))
+      (when cache
+	;; Cheat.  part is non-nil, and `plist-put' always modifies
+	;; the list in place if it's non-nil.
+	(plist-put part :binary-content data)))
+    data))
+
+(defun notmuch-get-bodypart-text (msg part process-crypto &optional cache)
   "Return the text content of PART in MSG.
 
 This returns the content of the given part as a multibyte Lisp
 string after performing content transfer decoding and any
 necessary charset decoding.  It is an error to use this for
-non-text/* parts."
+non-text/* parts.
+
+If CACHE is non-nil, the content of this part will be saved in
+MSG (if it isn't already)."
   (let ((content (plist-get part :content)))
     (when (not content)
       ;; Use show --format=sexp to fetch decoded content
@@ -572,7 +586,9 @@ (defun notmuch-get-bodypart-text (msg part process-crypto)
 	     (npart (apply #'notmuch-call-notmuch-sexp args)))
 	(setq content (plist-get npart :content))
 	(when (not content)
-	  (error "Internal error: No :content from %S" args))))
+	  (error "Internal error: No :content from %S" args)))
+      (when cache
+	(plist-put part :content content)))
     content))
 
 ;; Workaround: The call to `mm-display-part' below triggers a bug in
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 6/8] emacs: Use generalized content caching in w3m CID code
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
                   ` (4 preceding siblings ...)
  2015-01-24 21:17 ` [PATCH v2 5/8] emacs: Support caching in notmuch-get-bodypart-{binary, text} Austin Clements
@ 2015-01-24 21:17 ` Austin Clements
  2015-01-24 21:17 ` [PATCH v2 7/8] emacs: Rewrite content ID handling Austin Clements
  2015-01-24 21:17 ` [PATCH v2 8/8] emacs: Support cid: references with shr renderer Austin Clements
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:17 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

Previously this did its own caching, but this is now supported by more
generally by `notmuch-get-bodypart-binary'.
---
 emacs/notmuch-show.el | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/emacs/notmuch-show.el b/emacs/notmuch-show.el
index f29428a..11eac5f 100644
--- a/emacs/notmuch-show.el
+++ b/emacs/notmuch-show.el
@@ -562,15 +562,14 @@ (defvar w3m-current-buffer) ;; From `w3m.el'.
 (defvar notmuch-show-w3m-cid-store nil)
 (make-variable-buffer-local 'notmuch-show-w3m-cid-store)
 
-(defun notmuch-show-w3m-cid-store-internal (content-id msg part content)
-  (push (list content-id msg part content)
-	notmuch-show-w3m-cid-store))
+(defun notmuch-show-w3m-cid-store-internal (content-id msg part)
+  (push (list content-id msg part) notmuch-show-w3m-cid-store))
 
 (defun notmuch-show-w3m-cid-store (msg part)
   (let ((content-id (plist-get part :content-id)))
     (when content-id
       (notmuch-show-w3m-cid-store-internal (concat "cid:" content-id)
-					   msg part nil))))
+					   msg part))))
 
 (defun notmuch-show-w3m-cid-retrieve (url &rest args)
   (let ((matching-part (with-current-buffer w3m-current-buffer
@@ -578,18 +577,12 @@ (defun notmuch-show-w3m-cid-retrieve (url &rest args)
     (if matching-part
 	(let* ((msg (nth 1 matching-part))
 	       (part (nth 2 matching-part))
-	       (content (nth 3 matching-part))
 	       (content-type (plist-get part :content-type)))
-	  ;; If we don't already have the content, get it and cache
-	  ;; it, as some messages reference the same cid: part many
-	  ;; times (hundreds!), which results in many calls to
-	  ;; `notmuch part'.
-	  (unless content
-	    (setq content (notmuch-get-bodypart-binary
-			   msg part notmuch-show-process-crypto))
-	    (with-current-buffer w3m-current-buffer
-	      (notmuch-show-w3m-cid-store-internal url msg part content)))
-	  (insert content)
+	  ;; Request content caching, as some messages reference the
+	  ;; same cid: part many times (hundreds!), which results in
+	  ;; many calls to `notmuch show'.
+	  (insert (notmuch-get-bodypart-binary
+		   msg part notmuch-show-process-crypto 'cache))
 	  content-type)
       nil)))
 
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 7/8] emacs: Rewrite content ID handling
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
                   ` (5 preceding siblings ...)
  2015-01-24 21:17 ` [PATCH v2 6/8] emacs: Use generalized content caching in w3m CID code Austin Clements
@ 2015-01-24 21:17 ` Austin Clements
  2015-01-24 21:17 ` [PATCH v2 8/8] emacs: Support cid: references with shr renderer Austin Clements
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:17 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

Besides generally cleaning up the code and separating the general
content ID handling from the w3m-specific code, this fixes several
problems.

Foremost is that, previously, the code roughly assumed that referenced
parts would be in the same multipart/related as the reference.
According to RFC 2392, nothing could be further from the truth:
content IDs are supposed to be globally unique and globally
addressable.  This is nonsense, but this patch at least fixes things
so content IDs can be anywhere in the same message.

As a side-effect of the above, this handles multipart/alternate
content-IDs more in line with RFC 2046 section 5.1.2 (not that I've
ever seen this in the wild).  This also properly URL-decodes cid:
URLs, as per RFC 2392 (the previous code did not), and applies crypto
settings from the show buffer (the previous code used the global
crypto settings).
---
 emacs/notmuch-show.el | 120 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 74 insertions(+), 46 deletions(-)

diff --git a/emacs/notmuch-show.el b/emacs/notmuch-show.el
index 11eac5f..34dcedd 100644
--- a/emacs/notmuch-show.el
+++ b/emacs/notmuch-show.el
@@ -525,6 +525,73 @@ (defun notmuch-show-toggle-part-invisibility (&optional button)
 	  (overlay-put overlay 'invisible (not show))
 	  t)))))
 
+;; Part content ID handling
+
+(defvar notmuch-show--cids nil
+  "Alist from raw content ID to (MSG PART).")
+(make-variable-buffer-local 'notmuch-show--cids)
+
+(defun notmuch-show--register-cids (msg part)
+  "Register content-IDs in PART and all of PART's sub-parts."
+  (let ((content-id (plist-get part :content-id)))
+    (when content-id
+      ;; Note that content-IDs are globally unique, except when they
+      ;; aren't: RFC 2046 section 5.1.4 permits children of a
+      ;; multipart/alternative to have the same content-ID, in which
+      ;; case the MUA is supposed to pick the best one it can render.
+      ;; We simply add the content-ID to the beginning of our alist;
+      ;; so if this happens, we'll take the last (and "best")
+      ;; alternative (even if we can't render it).
+      (push (list content-id msg part) notmuch-show--cids)))
+  ;; Recurse on sub-parts
+  (let ((ctype (notmuch-split-content-type
+		(downcase (plist-get part :content-type)))))
+    (cond ((equal (first ctype) "multipart")
+	   (mapc (apply-partially #'notmuch-show--register-cids msg)
+		 (plist-get part :content)))
+	  ((equal ctype '("message" "rfc822"))
+	   (notmuch-show--register-cids
+	    msg
+	    (first (plist-get (first (plist-get part :content)) :body)))))))
+
+(defun notmuch-show--get-cid-content (cid)
+  "Return a list (CID-content content-type) or nil.
+
+This will only find parts from messages that have been inserted
+into the current buffer.  CID must be a raw content ID, without
+enclosing angle brackets, a cid: prefix, or URL encoding.  This
+will return nil if the CID is unknown or cannot be retrieved."
+  (let ((descriptor (cdr (assoc cid notmuch-show--cids))))
+    (when descriptor
+      (let* ((msg (first descriptor))
+	     (part (second descriptor))
+	     ;; Request caching for this content, as some messages
+	     ;; reference the same cid: part many times (hundreds!).
+	     (content (notmuch-get-bodypart-binary
+		       msg part notmuch-show-process-crypto 'cache))
+	     (content-type (plist-get part :content-type)))
+	(list content content-type)))))
+
+(defun notmuch-show-setup-w3m ()
+  "Instruct w3m how to retrieve content from a \"related\" part of a message."
+  (interactive)
+  (if (boundp 'w3m-cid-retrieve-function-alist)
+    (unless (assq 'notmuch-show-mode w3m-cid-retrieve-function-alist)
+      (push (cons 'notmuch-show-mode #'notmuch-show--cid-w3m-retrieve)
+	    w3m-cid-retrieve-function-alist)))
+  (setq mm-inline-text-html-with-images t))
+
+(defvar w3m-current-buffer) ;; From `w3m.el'.
+(defun notmuch-show--cid-w3m-retrieve (url &rest args)
+  ;; url includes the cid: prefix and is URL encoded (see RFC 2392).
+  (let* ((cid (url-unhex-string (substring url 4)))
+	 (content-and-type
+	  (with-current-buffer w3m-current-buffer
+	    (notmuch-show--get-cid-content cid))))
+    (when content-and-type
+      (insert (first content-and-type))
+      (second content-and-type))))
+
 ;; MIME part renderers
 
 (defun notmuch-show-multipart/*-to-list (part)
@@ -549,56 +616,11 @@ (defun notmuch-show-insert-part-multipart/alternative (msg part content-type nth
       (indent-rigidly start (point) 1)))
   t)
 
-(defun notmuch-show-setup-w3m ()
-  "Instruct w3m how to retrieve content from a \"related\" part of a message."
-  (interactive)
-  (if (boundp 'w3m-cid-retrieve-function-alist)
-    (unless (assq 'notmuch-show-mode w3m-cid-retrieve-function-alist)
-      (push (cons 'notmuch-show-mode 'notmuch-show-w3m-cid-retrieve)
-	    w3m-cid-retrieve-function-alist)))
-  (setq mm-inline-text-html-with-images t))
-
-(defvar w3m-current-buffer) ;; From `w3m.el'.
-(defvar notmuch-show-w3m-cid-store nil)
-(make-variable-buffer-local 'notmuch-show-w3m-cid-store)
-
-(defun notmuch-show-w3m-cid-store-internal (content-id msg part)
-  (push (list content-id msg part) notmuch-show-w3m-cid-store))
-
-(defun notmuch-show-w3m-cid-store (msg part)
-  (let ((content-id (plist-get part :content-id)))
-    (when content-id
-      (notmuch-show-w3m-cid-store-internal (concat "cid:" content-id)
-					   msg part))))
-
-(defun notmuch-show-w3m-cid-retrieve (url &rest args)
-  (let ((matching-part (with-current-buffer w3m-current-buffer
-			 (assoc url notmuch-show-w3m-cid-store))))
-    (if matching-part
-	(let* ((msg (nth 1 matching-part))
-	       (part (nth 2 matching-part))
-	       (content-type (plist-get part :content-type)))
-	  ;; Request content caching, as some messages reference the
-	  ;; same cid: part many times (hundreds!), which results in
-	  ;; many calls to `notmuch show'.
-	  (insert (notmuch-get-bodypart-binary
-		   msg part notmuch-show-process-crypto 'cache))
-	  content-type)
-      nil)))
-
 (defun notmuch-show-insert-part-multipart/related (msg part content-type nth depth button)
   (let ((inner-parts (plist-get part :content))
 	(start (point)))
 
-    ;; We assume that the first part is text/html and the remainder
-    ;; things that it references.
-
-    ;; Stash the non-primary parts.
-    (mapc (lambda (part)
-	    (notmuch-show-w3m-cid-store msg part))
-	  (cdr inner-parts))
-
-    ;; Render the primary part.
+    ;; Render the primary part.  FIXME: Support RFC 2387 Start header.
     (notmuch-show-insert-bodypart msg (car inner-parts) depth)
     ;; Add hidden buttons for the rest
     (mapc (lambda (inner-part)
@@ -910,6 +932,12 @@ (defun notmuch-show-insert-bodypart (msg part depth &optional hide)
 
 (defun notmuch-show-insert-body (msg body depth)
   "Insert the body BODY at depth DEPTH in the current thread."
+
+  ;; Register all content IDs for this message.  According to RFC
+  ;; 2392, content IDs are *global*, but it's okay if an MUA treats
+  ;; them as only global within a message.
+  (notmuch-show--register-cids msg (first body))
+
   (mapc (lambda (part) (notmuch-show-insert-bodypart msg part depth)) body))
 
 (defun notmuch-show-make-symbol (type)
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 8/8] emacs: Support cid: references with shr renderer
  2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
                   ` (6 preceding siblings ...)
  2015-01-24 21:17 ` [PATCH v2 7/8] emacs: Rewrite content ID handling Austin Clements
@ 2015-01-24 21:17 ` Austin Clements
  7 siblings, 0 replies; 9+ messages in thread
From: Austin Clements @ 2015-01-24 21:17 UTC (permalink / raw)
  To: notmuch; +Cc: tomi.ollila

shr has really nice support for inline image rendering, but previously
we only had the hooks for w3m cid: references.
---
 emacs/notmuch-show.el | 45 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/emacs/notmuch-show.el b/emacs/notmuch-show.el
index 34dcedd..66350d4 100644
--- a/emacs/notmuch-show.el
+++ b/emacs/notmuch-show.el
@@ -767,14 +767,43 @@ (defun notmuch-show-get-mime-type-of-application/octet-stream (part)
 	  nil))))
 
 (defun notmuch-show-insert-part-text/html (msg part content-type nth depth button)
-  ;; text/html handler to work around bugs in renderers and our
-  ;; invisibile parts code. In particular w3m sets up a keymap which
-  ;; "leaks" outside the invisible region and causes strange effects
-  ;; in notmuch. We set mm-inline-text-html-with-w3m-keymap to nil to
-  ;; tell w3m not to set a keymap (so the normal notmuch-show-mode-map
-  ;; remains).
-  (let ((mm-inline-text-html-with-w3m-keymap nil))
-    (notmuch-show-insert-part-*/* msg part content-type nth depth button)))
+  (if (eq mm-text-html-renderer 'shr)
+      ;; It's easier to drive shr ourselves than to work around the
+      ;; goofy things `mm-shr' does (like irreversibly taking over
+      ;; content ID handling).
+      (notmuch-show--insert-part-text/html-shr msg part)
+    ;; Otherwise, let message-mode do the heavy lifting
+    ;;
+    ;; w3m sets up a keymap which "leaks" outside the invisible region
+    ;; and causes strange effects in notmuch. We set
+    ;; mm-inline-text-html-with-w3m-keymap to nil to tell w3m not to
+    ;; set a keymap (so the normal notmuch-show-mode-map remains).
+    (let ((mm-inline-text-html-with-w3m-keymap nil))
+      (notmuch-show-insert-part-*/* msg part content-type nth depth button))))
+
+;; These functions are used by notmuch-show--insert-part-text/html-shr
+(declare-function libxml-parse-html-region "xml.c")
+(declare-function shr-insert-document "shr")
+
+(defun notmuch-show--insert-part-text/html-shr (msg part)
+  ;; Make sure shr is loaded before we start let-binding its globals
+  (require 'shr)
+  (let ((dom (let ((process-crypto notmuch-show-process-crypto))
+	       (with-temp-buffer
+		 (insert (notmuch-get-bodypart-text msg part process-crypto))
+		 (libxml-parse-html-region (point-min) (point-max)))))
+	(shr-content-function
+	 (lambda (url)
+	   ;; shr strips the "cid:" part of URL, but doesn't
+	   ;; URL-decode it (see RFC 2392).
+	   (let ((cid (url-unhex-string url)))
+	     (first (notmuch-show--get-cid-content cid)))))
+	;; Block all external images to prevent privacy leaks and
+	;; potential attacks.  FIXME: If we block an image, offer a
+	;; button to load external images.
+	(shr-blocked-images "."))
+    (shr-insert-document dom)
+    t))
 
 (defun notmuch-show-insert-part-*/* (msg part content-type nth depth button)
   ;; This handler _must_ succeed - it is the handler of last resort.
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-01-24 21:18 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-24 21:16 [PATCH v2 0/8] Improve charset and cid: handling Austin Clements
2015-01-24 21:16 ` [PATCH v2 1/8] emacs: Track full message and part descriptor in w3m CID store Austin Clements
2015-01-24 21:16 ` [PATCH v2 2/8] emacs: Create an API for fetching parts as undecoded binary Austin Clements
2015-01-24 21:16 ` [PATCH v2 3/8] emacs: Remove broken `notmuch-get-bodypart-content' API Austin Clements
2015-01-24 21:16 ` [PATCH v2 4/8] emacs: Return unibyte strings for binary part data Austin Clements
2015-01-24 21:17 ` [PATCH v2 5/8] emacs: Support caching in notmuch-get-bodypart-{binary, text} Austin Clements
2015-01-24 21:17 ` [PATCH v2 6/8] emacs: Use generalized content caching in w3m CID code Austin Clements
2015-01-24 21:17 ` [PATCH v2 7/8] emacs: Rewrite content ID handling Austin Clements
2015-01-24 21:17 ` [PATCH v2 8/8] emacs: Support cid: references with shr renderer Austin Clements

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).