unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: ozzloy <ozzloy@gmail.com>
To: Eli Zaretskii <eliz@gnu.org>
Cc: 63941@debbugs.gnu.org
Subject: bug#63941: [PATCH] ; always CRLF before non-first boundary in multipart form
Date: Sun, 18 Jun 2023 16:23:23 -0700	[thread overview]
Message-ID: <CACT2Oni-ABRW4kUVLZgNqZa3_X04+60=wzefARWW0N3dQQqM3w@mail.gmail.com> (raw)
In-Reply-To: <CACT2Onh1KjdmiiMD+fu87=RB9j0ZyYmySomRqmjWtOwFcCKN8Q@mail.gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 911 bytes --]

> who knows where it is used?

As far as I can tell, it is only ever used by =(eww-submit ...)= from
=lisp/net/eww.el=.  That's the only place it's used in all of emacs,
and every reference I can find on the web.

I've tested the heck out of it now, and also used firefox, chromium,
and curl to generate http messages for comparison.

Those tests of the different versions of
=mm-url-encode-multipart-form-data=, as well as the http messages
generated by firefox, chromium, and curl can be seen here

https://git.sr.ht/~ozzloy/emacs-bug-63941/tree/53a7949a5db21c456c1da3b4add29343c3d02137/item/mm-url-tests.el

The patch I have attached to this email generates output that matches
firefox, chromium, and curl.  It also includes a bunch of tests for
the included version of =mm-url-encode-multipart-form-data=

If there's some change that would make the patch a better fit, let me
know.  I'm happy to modify it.

[-- Attachment #1.2: Type: text/html, Size: 1158 bytes --]

[-- Attachment #2: 0001-always-CRLF-before-non-first-boundary-in-mulitpart-f.patch --]
[-- Type: text/x-patch, Size: 12293 bytes --]

From c73ccda90623519434f8ec2c700adf70ac1d6a00 Mon Sep 17 00:00:00 2001
From: Daniel Watson <ozzloy@gmail.com>
Date: Sun, 18 Jun 2023 15:14:32 -0700
Subject: [PATCH] always CRLF before non-first boundary in mulitpart form

; Insert CRLF after file contents and before boundary,
; in accordance with the syntax description here
; rfc2046 section 5.1.1.
; The CRLF is attached to the boundary, and not the preceding part.
---
 lisp/gnus/mm-url.el            | 156 +++++++++++++++++++-------
 test/lisp/gnus/mm-url-tests.el | 194 +++++++++++++++++++++++++++++++++
 2 files changed, 308 insertions(+), 42 deletions(-)
 create mode 100644 test/lisp/gnus/mm-url-tests.el

diff --git a/lisp/gnus/mm-url.el b/lisp/gnus/mm-url.el
index 11847a79f17..f3eecbf18ed 100644
--- a/lisp/gnus/mm-url.el
+++ b/lisp/gnus/mm-url.el
@@ -394,54 +394,126 @@ mm-url-encode-www-form-urlencoded
 
 (autoload 'mml-compute-boundary "mml")
 
+(defun mm-url--encode-multipart-form-file (file-metadata)
+  "Return a list of lines used to represent an http message with a
+file and its contents, as described in FILE-METADATA
+
+* example
+** input
+  '((\"name\"         . \"a\")
+    (\"filename\"     . \"b\")
+    (\"filedata\"     . \"c\")
+    (\"content-type\" . \"d\"))
+** output
+  '(\"Content-Disposition: form-data; name=\\\"a\\\"; filename=\\\"b\\\"\"
+    \"Content-Type: d; charset=utf-8\"
+    \"Content-Transfer-Encoding: binary\"
+    \"\"    ;; completely blank line after all the headers
+    \"c\")   ;; entire file content"
+  (cl-flet ((get-value (key default)
+		       (alist-get
+			key file-metadata default nil #'string=)))
+    (let ((filedata     (get-value     "filedata"          nil))
+	  (name         (get-value         "name"       "file"))
+	  (filename     (get-value     "filename"       "file"))
+	  (content-type (get-value "content-type" "text/plain")))
+      (list
+       (format "Content-Disposition: form-data; name=%S; filename=%S"
+	       name filename)
+       (format "Content-Type: %s; charset=utf-8" content-type)
+       "Content-Transfer-Encoding: binary"
+       ""
+       (cl-typecase filedata
+	 (integer (number-to-string filedata))
+	 (string filedata))))))
+
+(defun mm-url--encode-multipart-form-name-value (name value)
+  "Return NAME and VALUE as a list of lines used for creating
+multipart/form-data http message.
+
+* example
+** input
+  '(\"a\" . \"b\")
+** output
+  '(\"Content-Disposition: form-data; name=\\\"a\\\"\"
+    \"\"
+    \"b\")"
+  (list (format "Content-Disposition: form-data; name=%S" name)
+	""
+	value))
+
+(defun mm-url--encode-multipart-form-submit ()
+  "Return list of lines for submit message."
+  (list "Content-Disposition: form-data; name=\"submit\""
+	""
+	"Submit"))
+
+(defun mm-url--encode-multipart-form-datum (separator datum)
+  "Return list of lines in one segment of a multipart form.
+
+* example
+** input
+  \"--BOUNDARY\" '(\"submit\")
+** output
+  '(\"--BOUNDARY\"
+    \"Content-Disposition: form-data; name=\\\"submit\\\"\"
+    \"\"
+    \"Submit\")"
+  (let ((name  (car datum))
+	(value (cdr datum)))
+    (cons separator
+	  (pcase name
+	    ("file"   (mm-url--encode-multipart-form-file value))
+	    ("submit" (mm-url--encode-multipart-form-submit))
+	    (_        (mm-url--encode-multipart-form-name-value
+		       name value))))))
+
+(defun mm-url--encode-multipart-form-lines (boundary data)
+  "return a list of lines for entire multipart form
+
+* examples
+** input 0
+  \"BOUNDARY\" '((\"submit\"))
+** output 0
+  '(\"--BOUNDARY\"
+    \"Content-Disposition: form-data; name=\\\"submit\\\"\"
+    \"\"
+    \"Submit\"
+    \"--BOUNDARY--\"
+    \"\")
+** input 1
+  \"BOUNDARY\" '((\"submit\") (\"a\" . \"b\"))
+** output 1
+  '(\"--BOUNDARY\"
+    \"Content-Disposition: form-data; name=\\\"submit\\\"\"
+    \"\"
+    \"Submit\"
+    \"--BOUNDARY\"
+    \"Content-Disposition: form-data; name=\\\"a\\\"\"
+    \"\"
+    \"b\"
+    \"--BOUNDARY--\"
+    \"\")"
+  (let ((separator (concat "--" boundary))
+	(ending    (concat "--" boundary "--")))
+    (append (mapcan (apply-partially 'mm-url--encode-multipart-form-datum
+				     separator)
+		    data)
+	    (list ending ""))))
+
 (defun mm-url-encode-multipart-form-data (data &optional boundary)
   "Return DATA encoded in multipart/form-data.
 DATA is a list where the elements can have the following form:
   (\"NAME\" . \"VALUE\")
   (\"submit\")
-  (\"file\" . ((\"name\" . \"NAME\")
-             (\"filename\" . \"FILENAME\")
-             (\"content-type\" . \"CONTENT-TYPE\")
-             (\"filedata\" . \"FILEDATA\")))
+  (\"file\" . ((\"name\"        . \"NAME\")
+	      (\"filename\"     . \"FILENAME\")
+	      (\"content-type\" . \"CONTENT-TYPE\")
+	      (\"filedata\"     . \"FILEDATA\")))
 Lowercase strings above are literals and uppercase are not."
-  ;; RFC1867
-  ;; Get a good boundary.
-  (unless boundary
-    (setq boundary (mml-compute-boundary '())))
-  (with-temp-buffer
-    (set-buffer-multibyte nil)
-    (dolist (elem data)
-      (let ((name (car elem))
-	    (value (cdr elem)))
-	(insert "--" boundary "\r\n")
-	(cond
-	 ((equal name "file")
-	  (insert (format
-		   "Content-Disposition: form-data; name=%S; filename=%S\r\n"
-		   (or (cdr (assoc "name" value)) name)
-		   (cdr (assoc "filename" value))))
-	  (insert "Content-Transfer-Encoding: binary\r\n")
-	  (insert (format "Content-Type: %s\r\n\r\n"
-			  (or (cdr (assoc "content-type" value))
-			      "text/plain")))
-	  (let ((filedata (cdr (assoc "filedata" value))))
-	    (cond
-	     ((stringp filedata)
-	      (insert filedata))
-	     ;; How can this possibly be useful?
-	     ((integerp filedata)
-	      (insert (number-to-string filedata))))))
-	 ((equal name "submit")
-	  (insert
-	   "Content-Disposition: form-data; name=\"submit\"\r\n\r\nSubmit\r\n"))
-	 (t
-	  (insert (format "Content-Disposition: form-data; name=%S\r\n\r\n"
-			  name))
-	  (insert value)))
-	(unless (bolp)
-	  (insert "\r\n"))))
-    (insert "--" boundary "--\r\n")
-    (buffer-string)))
+  (let* ((boundary (or boundary (mml-compute-boundary '())))
+	 (lines    (mm-url--encode-multipart-form-lines boundary data)))
+    (mapconcat 'identity lines "\r\n")))
 
 (defun mm-url-remove-markup ()
   "Remove all HTML markup, leaving just plain text."
diff --git a/test/lisp/gnus/mm-url-tests.el b/test/lisp/gnus/mm-url-tests.el
new file mode 100644
index 00000000000..0c31a86146b
--- /dev/null
+++ b/test/lisp/gnus/mm-url-tests.el
@@ -0,0 +1,194 @@
+;;; mm-url-tests.el ---  -*- lexical-binding:t -*-
+
+;; Copyright (C) 2021-2023 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+
+(require 'ert)
+(require 'mm-url)
+
+
+(ert-deftest test-mm-url--encode-multipart-form-file ()
+  (should
+   (equal
+    (mm-url--encode-multipart-form-file '(("name"         . "a")
+					  ("filename"     . "b")
+					  ("filedata"     . "c\n")
+					  ("content-type" . "d")))
+    '("Content-Disposition: form-data; name=\"a\"; filename=\"b\""
+      "Content-Type: d; charset=utf-8"
+      "Content-Transfer-Encoding: binary"
+      ""    ;; completely blank line to separate headers from content
+      "c\n"))))
+
+(ert-deftest test-mm-url--encode-multipart-form-name-value ()
+  (should (equal (mm-url--encode-multipart-form-name-value "a" "b")
+		 '("Content-Disposition: form-data; name=\"a\""
+		   ""
+		   "b"))))
+
+(ert-deftest test-mm-url--encode-multipart-form-submit ()
+  (should
+   (equal (mm-url--encode-multipart-form-submit)
+	  (list "Content-Disposition: form-data; name=\"submit\""
+		""
+		"Submit"))))
+
+(ert-deftest test-mm-url--encode-multipart-form-datum ()
+  (should
+   (equal
+    (mm-url--encode-multipart-form-datum "--BOUNDARY" '("submit"))
+    '("--BOUNDARY"
+      "Content-Disposition: form-data; name=\"submit\""
+      ""
+      "Submit"))))
+
+(ert-deftest test-mm-url-encode-multipart-form-data-nil ()
+  (should
+   (string=
+    (mm-url-encode-multipart-form-data '() "BOUNDARY")
+    "--BOUNDARY--\r\n")))
+
+(ert-deftest test-mm-url-encode-multipart-form-data--name-value ()
+  (should
+   (string=
+    (mm-url-encode-multipart-form-data
+     '(("name" . "value")) "BOUNDARY")
+    (concat "--BOUNDARY\r\n"
+	    "Content-Disposition: form-data; name=\"name\"\r\n"
+	    "\r\n"
+	    "value\r\n"
+	    "--BOUNDARY--\r\n"))))
+
+(ert-deftest test-mm-url-encode-multipart-form-data--submit ()
+  (should
+   (string=
+    (mm-url-encode-multipart-form-data '(("submit")) "BOUNDARY")
+    (concat "--BOUNDARY\r\n"
+	    "Content-Disposition: form-data; name=\"submit\"\r\n"
+	    "\r\n"
+	    "Submit\r\n"
+	    "--BOUNDARY--\r\n"))))
+
+(ert-deftest test-mm-url-encode-multipart-form-data--file ()
+  (should
+   (string=
+    (mm-url-encode-multipart-form-data
+     '(("file" . (("name"         . "a")
+		  ("filename"     . "b")
+		  ("content-type" . "c")
+		  ("filedata"     . "d\n"))))
+     "BOUNDARY")
+    (concat
+     "--BOUNDARY\r\n"
+     "Content-Disposition: form-data; name=\"a\"; filename=\"b\"\r\n"
+     "Content-Type: c; charset=utf-8\r\n"
+     "Content-Transfer-Encoding: binary\r\n"
+     "\r\n"
+
+     ;; file content
+     "d\n"
+
+     ;; rfc 2046 section 5
+     ;; https://www.rfc-editor.org/rfc/rfc2046#section-5
+     ;; "The boundary delimiter MUST occur at the beginning of a
+     ;; line, i.e., following a CRLF, and the initial CRLF is
+     ;; considered to be attached to the boundary delimiter line
+     ;; rather than part of the preceding part."
+     "\r\n"
+
+     "--BOUNDARY--\r\n"))))
+
+(ert-deftest test-mm-url-encode-multipart-form-data--all-parts ()
+  (should
+   (string=
+    (mm-url-encode-multipart-form-data
+     '(("name" . "value")
+       ("submit")
+       ("file" . (("name"         . "a")
+		  ("filename"     . "b")
+		  ("content-type" . "c")
+		  ("filedata"     . "d"))))
+     "BOUNDARY")
+    (concat
+     "--BOUNDARY\r\n"
+     "Content-Disposition: form-data; name=\"name\"\r\n"
+     "\r\n"
+     "value\r\n"
+     "--BOUNDARY\r\n"
+     "Content-Disposition: form-data; name=\"submit\"\r\n"
+     "\r\n"
+     "Submit\r\n"
+     "--BOUNDARY\r\n"
+     "Content-Disposition: form-data; name=\"a\"; filename=\"b\"\r\n"
+     "Content-Type: c; charset=utf-8\r\n"
+     "Content-Transfer-Encoding: binary\r\n"
+     "\r\n"
+
+     ;; file content
+     "d"
+
+     ;; rfc 2046 section 5
+     ;; the \r\n is attached to the boundary below it
+     "\r\n"
+     "--BOUNDARY--\r\n"))))
+
+(ert-deftest test-mm-url-encode-multipart-form-data-two-files ()
+  (should
+   (string=
+    (mm-url-encode-multipart-form-data
+     '(("file" . (("name"         . "a")
+		  ("filename"     . "b")
+		  ("content-type" . "c")
+		  ("filedata"     . "d\n")))
+       ("file" . (("name"         . "e")
+		  ("filename"     . "f")
+		  ("content-type" . "g")
+		  ("filedata"     . "h\n"))))
+     "BOUNDARY")
+    (concat
+     "--BOUNDARY\r\n"
+     "Content-Disposition: form-data; name=\"a\"; filename=\"b\"\r\n"
+     "Content-Type: c; charset=utf-8\r\n"
+     "Content-Transfer-Encoding: binary\r\n"
+     "\r\n"
+
+     ;; file content
+     "d\n"
+
+     ;; rfc2046 section 5
+     ;; the \r\n is attached to the boundary below it
+     "\r\n"
+     "--BOUNDARY\r\n"
+     "Content-Disposition: form-data; name=\"e\"; filename=\"f\"\r\n"
+     "Content-Type: g; charset=utf-8\r\n"
+     "Content-Transfer-Encoding: binary\r\n"
+     "\r\n"
+
+     ;; file content
+     "h\n"
+
+     ;; rfc 2046 section 5
+     ;; the \r\n is attached to the boundary below it
+     "\r\n"
+     "--BOUNDARY--\r\n"))))
+
+
+;;; mm-url-tests.el ends here
-- 
2.34.1


  reply	other threads:[~2023-06-18 23:23 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-07  5:25 bug#63941: [PATCH] ; always CRLF before non-first boundary in multipart form ozzloy
2023-06-07 12:30 ` Eli Zaretskii
2023-06-08  2:48   ` ozzloy
2023-06-08  6:09     ` Eli Zaretskii
2023-06-08  6:43       ` ozzloy
2023-06-08  6:52         ` ozzloy
2023-06-10  9:42           ` Eli Zaretskii
2023-06-11  1:38             ` ozzloy
2023-06-18 23:23               ` ozzloy [this message]
2023-06-19 16:13                 ` Eli Zaretskii
2023-06-22 16:49                   ` ozzloy
2023-06-22 18:25                     ` ozzloy
2023-06-22 18:29                       ` Eli Zaretskii
2023-06-23  8:22                         ` ozzloy
2023-07-18 19:04     ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2023-07-21  9:04       ` ozzloy
2023-08-29  0:28         ` ozzloy
2023-12-02 15:03           ` ozzloy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CACT2Oni-ABRW4kUVLZgNqZa3_X04+60=wzefARWW0N3dQQqM3w@mail.gmail.com' \
    --to=ozzloy@gmail.com \
    --cc=63941@debbugs.gnu.org \
    --cc=eliz@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).