From mboxrd@z Thu Jan 1 00:00:00 1970 Path: main.gmane.org!not-for-mail From: mah@everybody.org (Mark A. Hershberger) Newsgroups: gmane.emacs.bugs Subject: xml.el patches for better spec compliance Date: Wed, 12 Mar 2003 13:42:43 -0600 Sender: bug-gnu-emacs-bounces+gnu-bug-gnu-emacs=m.gmane.org@gnu.org Message-ID: <8765qobccs.fsf@superman.everybody.org> NNTP-Posting-Host: main.gmane.org Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii X-Trace: main.gmane.org 1047498418 8577 80.91.224.249 (12 Mar 2003 19:46:58 GMT) X-Complaints-To: usenet@main.gmane.org NNTP-Posting-Date: Wed, 12 Mar 2003 19:46:58 +0000 (UTC) Original-X-From: bug-gnu-emacs-bounces+gnu-bug-gnu-emacs=m.gmane.org@gnu.org Wed Mar 12 20:46:53 2003 Return-path: Original-Received: from monty-python.gnu.org ([199.232.76.173]) by main.gmane.org with esmtp (Exim 3.35 #1 (Debian)) id 18tCBh-0002De-00 for ; Wed, 12 Mar 2003 20:46:53 +0100 Original-Received: from localhost ([127.0.0.1] helo=monty-python.gnu.org) by monty-python.gnu.org with esmtp (Exim 4.10.13) id 18tCAy-0008Qg-01 for gnu-bug-gnu-emacs@m.gmane.org; Wed, 12 Mar 2003 14:46:08 -0500 Original-Received: from list by monty-python.gnu.org with tmda-scanned (Exim 4.10.13) id 18tC8F-0007O6-00 for bug-gnu-emacs@gnu.org; Wed, 12 Mar 2003 14:43:19 -0500 Original-Received: from mail by monty-python.gnu.org with spam-scanned (Exim 4.10.13) id 18tC82-0007EG-00 for bug-gnu-emacs@gnu.org; Wed, 12 Mar 2003 14:43:08 -0500 Original-Received: from superman.everybody.org ([66.93.249.201]) by monty-python.gnu.org with esmtp (Exim 4.10.13) id 18tC7h-0006td-00 for bug-gnu-emacs@gnu.org; Wed, 12 Mar 2003 14:42:45 -0500 Original-Received: from mah by superman.everybody.org with local (Exim 3.35 #1 (Debian)) id 18tC7f-0001l0-00 for ; Wed, 12 Mar 2003 13:42:43 -0600 Original-To: bug-gnu-emacs@gnu.org X-BeenThere: bug-gnu-emacs@gnu.org X-Mailman-Version: 2.1b5 Precedence: list List-Id: Bug reports for GNU Emacs, the Swiss army knife of text editors List-Help: List-Post: List-Subscribe: , List-Archive: List-Unsubscribe: , Errors-To: bug-gnu-emacs-bounces+gnu-bug-gnu-emacs=m.gmane.org@gnu.org Xref: main.gmane.org gmane.emacs.bugs:4600 X-Report-Spam: http://spam.gmane.org/gmane.emacs.bugs:4600 The following patch contains the following changes to xml.el: * All instances of "[:space:]" changed to " \t\n\r". This is because the whitespace class does not include \r, but the XML spec includes \r as whitespace. (I had previously submitted the patch to change " \t\n" to "[:space:]", but I was wrong.) See . * Replace "\r\n" and "\r" with "\n". See . * Added attribute normalization. See . * Added character references. See . diff -c -r1.16 xml.el *** xml.el 11 Mar 2003 21:57:46 -0000 1.16 --- xml.el 12 Mar 2003 19:36:01 -0000 *************** *** 184,190 **** ;; beginning of a document) ((looking-at "<\\?") (search-forward "?>" end) ! (goto-char (- (re-search-forward "[^[:space:]]") 1)) (xml-parse-tag end)) ;; Character data (CDATA) sections, in which no tag should be interpreted ((looking-at "" end) ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) (xml-parse-tag end)) ;; Character data (CDATA) sections, in which no tag should be interpreted ((looking-at "[:space:]]+\\)") (goto-char (match-end 1)) (let* ((case-fold-search nil) ;; XML is case-sensitive. (node-name (match-string 1)) --- 210,216 ---- ((looking-at " \t\n\r]+\\)") (goto-char (match-end 1)) (let* ((case-fold-search nil) ;; XML is case-sensitive. (node-name (match-string 1)) *************** *** 219,225 **** pos) ;; is this an empty element ? ! (if (looking-at "/[[:space:]]*>") (progn (forward-char 2) (nreverse (cons '("") children))) --- 219,225 ---- pos) ;; is this an empty element ? ! (if (looking-at "/[ \t\n\r]*>") (progn (forward-char 2) (nreverse (cons '("") children))) *************** *** 230,236 **** (forward-char 1) ;; Now check that we have the right end-tag. Note that this ;; one might contain spaces after the tag name ! (while (not (looking-at (concat ""))) (cond ((looking-at ""))) (cond ((looking-at " (point) end) ! (error "XML: end of attribute list not found before end of region")) ! ) (nreverse attlist))) ;;******************************************************************* --- 282,324 ---- The search for attributes end at the position END in the current buffer. Leaves the point on the first non-blank character after the tag." (let ((attlist ()) ! start-pos name) ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) ! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n\r]*=[ \t\n\r]*") (setq name (intern (match-string 1))) (goto-char (match-end 0)) + ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize + ;; Do we have a string between quotes (or double-quotes), ;; or a simple word ? ! (if (looking-at "\"\\([^\"]*\\)\"") ! (setq start-pos (match-beginning 0)) ! (if (looking-at "'\\([^']*\\)") ! (setq start-pos (match-beginning 0)) (error "XML: Attribute values must be given between quotes"))) ;; Each attribute must be unique within a given element (if (assoc name attlist) (error "XML: each attribute must be unique within an element")) ! ;; Multiple whitespace characters should be replaced with a single one ! ;; in the attributes ! (let ((string (match-string-no-properties 1)) ! (pos 0)) ! (while (string-match "[ \t\n\r]+" string pos) ! (setq string (replace-match " " t nil string)) ! (setq pos (1+ (match-beginning 0)))) ! (push (cons name (xml-substitute-special string)) attlist)) ! ! (goto-char start-pos) ! (if (looking-at "\"\\([^\"]*\\)\"") ! (goto-char (match-end 0)) ! (if (looking-at "'\\([^']*\\)") ! (goto-char (match-end 0)))) ! (if (> (point) end) ! (error "XML: end of attribute list not found before end of region"))) (nreverse attlist))) ;;******************************************************************* *************** *** 318,332 **** The point must be just before the starting tag of the DTD. This follows the rule [28] in the XML specifications." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) (condition-case nil (progn (forward-word 1) ;; name of the document ! (goto-char (- (re-search-forward "[[:space:]]") 1)) ! (goto-char (- (re-search-forward "[^[:space:]]") 1)) (if (looking-at "\\[") ! (re-search-forward "\\][[:space:]]*>" end) (search-forward ">" end))) (error (error "XML: No end to the DTD")))) --- 335,349 ---- The point must be just before the starting tag of the DTD. This follows the rule [28] in the XML specifications." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) (condition-case nil (progn (forward-word 1) ;; name of the document ! (goto-char (- (re-search-forward "[ \t\n\r]") 1)) ! (goto-char (- (re-search-forward "[^\t\n\r]") 1)) (if (looking-at "\\[") ! (re-search-forward "\\][ \t\n\r]*>" end) (search-forward ">" end))) (error (error "XML: No end to the DTD")))) *************** *** 334,340 **** "Parse the DTD that point is looking at. The DTD must end before the position END in the current buffer." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) --- 351,357 ---- "Parse the DTD that point is looking at. The DTD must end before the position END in the current buffer." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) *************** *** 344,350 **** type element end-pos) (goto-char (match-end 0)) ! (goto-char (- (re-search-forward "[^[:space:]]") 1)) ;; External DTDs => don't know how to handle them yet (if (looking-at "SYSTEM") --- 361,367 ---- type element end-pos) (goto-char (match-end 0)) ! (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) ;; External DTDs => don't know how to handle them yet (if (looking-at "SYSTEM") *************** *** 355,367 **** ;; Parse the rest of the DTD (forward-char 1) ! (while (and (not (looking-at "[[:space:]]*\\]")) (<= (point) end)) (cond ;; Translation of rule [45] of XML specifications ((looking-at ! "[[:space:]]*]+\\)>") (setq element (intern (match-string-no-properties 1)) type (match-string-no-properties 2)) --- 372,384 ---- ;; Parse the rest of the DTD (forward-char 1) ! (while (and (not (looking-at "[ \t\n\r]*\\]")) (<= (point) end)) (cond ;; Translation of rule [45] of XML specifications ((looking-at ! "[ \t\n\r]*]+\\)>") (setq element (intern (match-string-no-properties 1)) type (match-string-no-properties 2)) *************** *** 369,381 **** ;; Translation of rule [46] of XML specifications (cond ! ((string-match "^EMPTY[[:space:]]*$" type) ;; empty declaration (setq type 'empty)) ! ((string-match "^ANY[[:space:]]*$" type) ;; any type of contents (setq type 'any)) ! ((string-match "^(\\(.*\\))[[:space:]]*$" type) ;; children ([47]) (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) ! ((string-match "^%[^;]+;[[:space:]]*$" type) ;; substitution nil) (t (error "XML: Invalid element type in the DTD"))) --- 386,398 ---- ;; Translation of rule [46] of XML specifications (cond ! ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration (setq type 'empty)) ! ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents (setq type 'any)) ! ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) ! ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution nil) (t (error "XML: Invalid element type in the DTD"))) *************** *** 417,423 **** (mapcar 'xml-parse-elem-type (split-string elem ",")))) ))) ! (if (string-match "[[:space:]]*\\([^+*?]+\\)\\([+*?]?\\)" string) (setq elem (match-string 1 string) modifier (match-string 2 string)))) --- 434,440 ---- (mapcar 'xml-parse-elem-type (split-string elem ",")))) ))) ! (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) (setq elem (match-string 1 string) modifier (match-string 2 string)))) *************** *** 451,456 **** --- 468,484 ---- (setq string (replace-match "'" t nil string))) (while (string-match """ string) (setq string (replace-match "\"" t nil string))) + (while (string-match "&#\\([0-9]+\\);" string) + (setq string (replace-match (char-to-string + (string-to-int + (match-string-no-properties 1 string))) + t nil string))) + (while (string-match "&#x\\([0-9a-fA-F]+\\);" string) + (setq string (replace-match (char-to-string + (hex-string-to-number + (match-string-no-properties 1 string))) + t nil string))) + ;; This goes last so it doesn't confuse the matches above. (while (string-match "&" string) (setq string (replace-match "&" t nil string)))