unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Michal Nazarewicz <mina86@mina86.com>
To: 31006@debbugs.gnu.org
Subject: bug#31006: [PATCH] Handle quotation marks and apostrophes in ‘sgml-quote’
Date: Sat, 31 Mar 2018 18:00:35 +0100	[thread overview]
Message-ID: <20180331170035.27048-1-mina86@mina86.com> (raw)

To be able to use text in an HTML argument, quotation marks need
to be replaced with an appropriate character reference.  Make
‘sgml-quote’ do that.

While at it, fix entiteis not being unquoted if they lack closing
semicolon (e.g. ‘&amp’) occuring at the very end of a region.
Even though unlikely, make ‘sgml-quote’ handle this scenario.

* lisp/textmodes/sgml-mode.el (sgml-quote): Handle quotation marks and
apostrophes.  Match entities lacking semicolon at the end of regions.
* test/lisp/textmodes/sgml-mode-tests.el (sgml-quote-works): New test
case for ‘sgml-quote’ function.
---
 etc/NEWS                               |  6 ++++++
 lisp/textmodes/sgml-mode.el            | 26 +++++++++++++++++++-------
 test/lisp/textmodes/sgml-mode-tests.el | 30 ++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 7 deletions(-)

If everyone is fine with this I’m gonna push it in a few days.

diff --git a/etc/NEWS b/etc/NEWS
index 07f6d04a74..2da16e53fe 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -168,6 +168,12 @@ Can be controlled via the new variable 'footnote-align-to-fn-text'.
 formats (e.g. "black" => "#000000" => "rgb(0, 0, 0)") has been added,
 bound to 'C-c C-f'.
 
+** SGML mode
+
+*** 'sgml-quote' now handles double quotes and apostrophes
+when escaping text and in addition all numeric entities when
+unescaping text.
+
 ** Dired
 
 +++
diff --git a/lisp/textmodes/sgml-mode.el b/lisp/textmodes/sgml-mode.el
index f6bdfc6384..52d14bd800 100644
--- a/lisp/textmodes/sgml-mode.el
+++ b/lisp/textmodes/sgml-mode.el
@@ -1241,8 +1241,11 @@ sgml-value
 
 (defun sgml-quote (start end &optional unquotep)
   "Quote SGML text in region START ... END.
-Only &, < and > are quoted, the rest is left untouched.
-With prefix argument UNQUOTEP, unquote the region."
+Only &, <, >, ' and \" characters are quoted, the rest is left
+untouched.  This is sufficient to use quoted text as SGML argument.
+
+With prefix argument UNQUOTEP, unquote the region.  All numeric entities,
+\"amp\", \"lt\", \"gt\" and \"quot\" named entities are unquoted."
   (interactive "r\nP")
   (save-restriction
     (narrow-to-region start end)
@@ -1250,14 +1253,23 @@ sgml-quote
     (if unquotep
 	;; FIXME: We should unquote other named character references as well.
 	(while (re-search-forward
-		"\\(&\\(amp\\|\\(l\\|\\(g\\)\\)t\\)\\)[][<>&;\n\t \"%!'(),/=?]"
+		"\\(&\\(amp\\|quot\\|lt\\|gt\\|#\\([0-9]+\\|[xX][0-9a-fA-F]+\\)\\)\\)\\([][<>&;\n\t \"%!'(),/=?]\\|$\\)"
 		nil t)
-	  (replace-match (if (match-end 4) ">" (if (match-end 3) "<" "&")) t t
-			 nil (if (eq (char-before (match-end 0)) ?\;) 0 1)))
-      (while (re-search-forward "[&<>]" nil t)
+          (replace-match
+           (string
+            (or (cdr (assq (char-after (match-beginning 2))
+                           '((?a . ?&) (?q . ?\") (?l . ?<) (?g . ?>))))
+                (let ((num (match-string 3)))
+                  (if (or (eq ?x (aref num 0)) (eq ?X (aref num 0)))
+                      (string-to-number (substring num 1) 16)
+                    (string-to-number num 10)))))
+           t t nil (if (eq (char-before (match-end 0)) ?\;) 0 1)))
+      (while (re-search-forward "[&<>\"']" nil t)
 	(replace-match (cdr (assq (char-before) '((?& . "&amp;")
 						  (?< . "&lt;")
-						  (?> . "&gt;"))))
+						  (?> . "&gt;")
+                                                  (?\" . "&#34;")
+                                                  (?' . "&#39;"))))
 		       t t)))))
 
 (defun sgml-pretty-print (beg end)
diff --git a/test/lisp/textmodes/sgml-mode-tests.el b/test/lisp/textmodes/sgml-mode-tests.el
index 7ca6e676c6..6c0070ccb1 100644
--- a/test/lisp/textmodes/sgml-mode-tests.el
+++ b/test/lisp/textmodes/sgml-mode-tests.el
@@ -131,5 +131,35 @@ sgml-with-content
    (sgml-delete-tag 1)
    (should (string= "Winter is comin'" (buffer-string)))))
 
+(ert-deftest sgml-quote-works ()
+  (let ((text "Foo<Bar> \"Baz\" 'Qux'\n"))
+    (with-temp-buffer
+      ;; Back and forth transformation.
+      (insert text)
+      (sgml-quote (point-min) (point-max))
+      (should (string= "Foo&lt;Bar&gt; &#34;Baz&#34; &#39;Qux&#39;\n"
+                       (buffer-string)))
+      (sgml-quote (point-min) (point-max) t)
+      (should (string= text (buffer-string)))
+
+      ;; The same text escaped differently.
+      (erase-buffer)
+      (insert "Foo&lt;Bar&gt; &#34;Baz&quot; &#x27;Qux&#X27;\n")
+      (sgml-quote (point-min) (point-max) t)
+      (should (string= text (buffer-string)))
+
+      ;; Lack of semicolon.
+      (erase-buffer)
+      (insert "&amp&amp")
+      (sgml-quote (point-min) (point-max) t)
+      (should (string= "&&" (buffer-string)))
+
+      ;; Double quoting
+      (sgml-quote (point-min) (point-max))
+      (sgml-quote (point-min) (point-max))
+      (sgml-quote (point-min) (point-max) t)
+      (sgml-quote (point-min) (point-max) t)
+      (should (string= "&&" (buffer-string))))))
+
 (provide 'sgml-mode-tests)
 ;;; sgml-mode-tests.el ends here
-- 
2.16.2






             reply	other threads:[~2018-03-31 17:00 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-31 17:00 Michal Nazarewicz [this message]
2018-04-01  8:59 ` bug#31006: [PATCH] Handle quotation marks and apostrophes in ‘sgml-quote’ Eli Zaretskii
2018-04-07 10:20   ` Michał Nazarewicz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180331170035.27048-1-mina86@mina86.com \
    --to=mina86@mina86.com \
    --cc=31006@debbugs.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).