From mboxrd@z Thu Jan 1 00:00:00 1970 Path: main.gmane.org!not-for-mail From: mah@everybody.org (Mark A. Hershberger) Newsgroups: gmane.emacs.bugs Subject: xml.el patches for better spec compliance Date: Wed, 12 Mar 2003 15:04:11 -0600 Sender: bug-gnu-emacs-bounces+gnu-bug-gnu-emacs=m.gmane.org@gnu.org Message-ID: <874r68b8l0.fsf@superman.everybody.org> NNTP-Posting-Host: main.gmane.org Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii X-Trace: main.gmane.org 1047503263 32660 80.91.224.249 (12 Mar 2003 21:07:43 GMT) X-Complaints-To: usenet@main.gmane.org NNTP-Posting-Date: Wed, 12 Mar 2003 21:07:43 +0000 (UTC) Original-X-From: bug-gnu-emacs-bounces+gnu-bug-gnu-emacs=m.gmane.org@gnu.org Wed Mar 12 22:07:41 2003 Return-path: Original-Received: from monty-python.gnu.org ([199.232.76.173]) by main.gmane.org with esmtp (Exim 3.35 #1 (Debian)) id 18tDR8-0008Ps-00 for ; Wed, 12 Mar 2003 22:06:55 +0100 Original-Received: from localhost ([127.0.0.1] helo=monty-python.gnu.org) by monty-python.gnu.org with esmtp (Exim 4.10.13) id 18tDQ2-0005JU-01 for gnu-bug-gnu-emacs@m.gmane.org; Wed, 12 Mar 2003 16:05:46 -0500 Original-Received: from list by monty-python.gnu.org with tmda-scanned (Exim 4.10.13) id 18tDPT-0004pk-00 for bug-gnu-emacs@gnu.org; Wed, 12 Mar 2003 16:05:11 -0500 Original-Received: from mail by monty-python.gnu.org with spam-scanned (Exim 4.10.13) id 18tDOj-00040k-00 for bug-gnu-emacs@gnu.org; Wed, 12 Mar 2003 16:04:26 -0500 Original-Received: from superman.everybody.org ([66.93.249.201]) by monty-python.gnu.org with esmtp (Exim 4.10.13) id 18tDOW-0003jg-00 for bug-gnu-emacs@gnu.org; Wed, 12 Mar 2003 16:04:12 -0500 Original-Received: from mah by superman.everybody.org with local (Exim 3.35 #1 (Debian)) id 18tDOV-0001z6-00 for ; Wed, 12 Mar 2003 15:04:11 -0600 Original-To: bug-gnu-emacs@gnu.org X-BeenThere: bug-gnu-emacs@gnu.org X-Mailman-Version: 2.1b5 Precedence: list List-Id: Bug reports for GNU Emacs, the Swiss army knife of text editors List-Help: List-Post: List-Subscribe: , List-Archive: List-Unsubscribe: , Errors-To: bug-gnu-emacs-bounces+gnu-bug-gnu-emacs=m.gmane.org@gnu.org Xref: main.gmane.org gmane.emacs.bugs:4601 X-Report-Spam: http://spam.gmane.org/gmane.emacs.bugs:4601 [This patch replaces the one sent earlier.] The following patch contains the following changes to xml.el: * All instances of "[:space:]" changed to " \t\n\r". This is because the whitespace class does not include \r, but the XML spec includes \r as whitespace. (I had previously submitted the patch to change " \t\n" to "[:space:]", but I was wrong.) See . * Replace "\r\n" and "\r" with "\n". See . * Added attribute normalization. See . * Added character references. See . diff -c -r1.16 xml.el *** xml.el 11 Mar 2003 21:57:46 -0000 1.16 --- xml.el 12 Mar 2003 21:02:24 -0000 *************** *** 184,190 **** ;; beginning of a document) ((looking-at "<\\?") (search-forward "?>" end) ! (goto-char (- (re-search-forward "[^[:space:]]") 1)) (xml-parse-tag end)) ;; Character data (CDATA) sections, in which no tag should be interpreted ((looking-at "" end) ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) (xml-parse-tag end)) ;; Character data (CDATA) sections, in which no tag should be interpreted ((looking-at "[:space:]]+\\)") (goto-char (match-end 1)) (let* ((case-fold-search nil) ;; XML is case-sensitive. (node-name (match-string 1)) --- 210,216 ---- ((looking-at " \t\n\r]+\\)") (goto-char (match-end 1)) (let* ((case-fold-search nil) ;; XML is case-sensitive. (node-name (match-string 1)) *************** *** 219,225 **** pos) ;; is this an empty element ? ! (if (looking-at "/[[:space:]]*>") (progn (forward-char 2) (nreverse (cons '("") children))) --- 219,225 ---- pos) ;; is this an empty element ? ! (if (looking-at "/[ \t\n\r]*>") (progn (forward-char 2) (nreverse (cons '("") children))) *************** *** 230,236 **** (forward-char 1) ;; Now check that we have the right end-tag. Note that this ;; one might contain spaces after the tag name ! (while (not (looking-at (concat ""))) (cond ((looking-at ""))) (cond ((looking-at " (point) end) ! (error "XML: end of attribute list not found before end of region")) ! ) (nreverse attlist))) ;;******************************************************************* --- 282,325 ---- The search for attributes end at the position END in the current buffer. Leaves the point on the first non-blank character after the tag." (let ((attlist ()) ! start-pos name) ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) ! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n\r]*=[ \t\n\r]*") (setq name (intern (match-string 1))) (goto-char (match-end 0)) + ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize + ;; Do we have a string between quotes (or double-quotes), ;; or a simple word ? ! (if (looking-at "\"\\([^\"]*\\)\"") ! (setq start-pos (match-beginning 0)) ! (if (looking-at "'\\([^']*\\)") ! (setq start-pos (match-beginning 0)) (error "XML: Attribute values must be given between quotes"))) ;; Each attribute must be unique within a given element (if (assoc name attlist) (error "XML: each attribute must be unique within an element")) ! ;; Multiple whitespace characters should be replaced with a single one ! ;; in the attributes ! (let ((string (match-string-no-properties 1)) ! (pos 0)) ! (while (string-match "[ \t\n\r]+" string pos) ! (setq string (replace-match " " t nil string)) ! (setq pos (1+ (match-beginning 0)))) ! (push (cons name (xml-substitute-special string)) attlist)) ! ! (goto-char start-pos) ! (if (looking-at "\"\\([^\"]*\\)\"") ! (goto-char (match-end 0)) ! (if (looking-at "'\\([^']*\\)") ! (goto-char (match-end 0)))) ! ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) (if (> (point) end) ! (error "XML: end of attribute list not found before end of region"))) (nreverse attlist))) ;;******************************************************************* *************** *** 318,332 **** The point must be just before the starting tag of the DTD. This follows the rule [28] in the XML specifications." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) (condition-case nil (progn ! (forward-word 1) ;; name of the document ! (goto-char (- (re-search-forward "[[:space:]]") 1)) ! (goto-char (- (re-search-forward "[^[:space:]]") 1)) (if (looking-at "\\[") ! (re-search-forward "\\][[:space:]]*>" end) (search-forward ">" end))) (error (error "XML: No end to the DTD")))) --- 336,350 ---- The point must be just before the starting tag of the DTD. This follows the rule [28] in the XML specifications." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) (condition-case nil (progn ! (forward-word 1) ! (goto-char (- (re-search-forward "[ \t\n\r]") 1)) ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) (if (looking-at "\\[") ! (re-search-forward "\\][ \t\n\r]*>" end) (search-forward ">" end))) (error (error "XML: No end to the DTD")))) *************** *** 334,340 **** "Parse the DTD that point is looking at. The DTD must end before the position END in the current buffer." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) --- 352,358 ---- "Parse the DTD that point is looking at. The DTD must end before the position END in the current buffer." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) *************** *** 344,350 **** type element end-pos) (goto-char (match-end 0)) ! (goto-char (- (re-search-forward "[^[:space:]]") 1)) ;; External DTDs => don't know how to handle them yet (if (looking-at "SYSTEM") --- 362,368 ---- type element end-pos) (goto-char (match-end 0)) ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) ;; External DTDs => don't know how to handle them yet (if (looking-at "SYSTEM") *************** *** 355,367 **** ;; Parse the rest of the DTD (forward-char 1) ! (while (and (not (looking-at "[[:space:]]*\\]")) (<= (point) end)) (cond ;; Translation of rule [45] of XML specifications ((looking-at ! "[[:space:]]*]+\\)>") (setq element (intern (match-string-no-properties 1)) type (match-string-no-properties 2)) --- 373,385 ---- ;; Parse the rest of the DTD (forward-char 1) ! (while (and (not (looking-at "[ \t\n\r]*\\]")) (<= (point) end)) (cond ;; Translation of rule [45] of XML specifications ((looking-at ! "[ \t\n\r]*]+\\)>") (setq element (intern (match-string-no-properties 1)) type (match-string-no-properties 2)) *************** *** 369,381 **** ;; Translation of rule [46] of XML specifications (cond ! ((string-match "^EMPTY[[:space:]]*$" type) ;; empty declaration (setq type 'empty)) ! ((string-match "^ANY[[:space:]]*$" type) ;; any type of contents (setq type 'any)) ! ((string-match "^(\\(.*\\))[[:space:]]*$" type) ;; children ([47]) (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) ! ((string-match "^%[^;]+;[[:space:]]*$" type) ;; substitution nil) (t (error "XML: Invalid element type in the DTD"))) --- 387,399 ---- ;; Translation of rule [46] of XML specifications (cond ! ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration (setq type 'empty)) ! ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents (setq type 'any)) ! ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) ! ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution nil) (t (error "XML: Invalid element type in the DTD"))) *************** *** 417,423 **** (mapcar 'xml-parse-elem-type (split-string elem ",")))) ))) ! (if (string-match "[[:space:]]*\\([^+*?]+\\)\\([+*?]?\\)" string) (setq elem (match-string 1 string) modifier (match-string 2 string)))) --- 435,441 ---- (mapcar 'xml-parse-elem-type (split-string elem ",")))) ))) ! (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) (setq elem (match-string 1 string) modifier (match-string 2 string)))) *************** *** 451,456 **** --- 469,485 ---- (setq string (replace-match "'" t nil string))) (while (string-match """ string) (setq string (replace-match "\"" t nil string))) + (while (string-match "&#\\([0-9]+\\);" string) + (setq string (replace-match (char-to-ucs + (string-to-int + (match-string-no-properties 1 string))) + t nil string))) + (while (string-match "&#x\\([0-9a-fA-F]+\\);" string) + (setq string (replace-match (char-to-ucs + (hex-string-to-number + (match-string-no-properties 1 string))) + t nil string))) + ;; This goes last so it doesn't confuse the matches above. (while (string-match "&" string) (setq string (replace-match "&" t nil string)))