* xml.el patches for better spec compliance
@ 2003-03-12 21:04 Mark A. Hershberger
2003-03-13 10:31 ` Juanma Barranquero
[not found] ` <mailman.3159.1047551561.21513.bug-gnu-emacs@gnu.org>
0 siblings, 2 replies; 7+ messages in thread
From: Mark A. Hershberger @ 2003-03-12 21:04 UTC (permalink / raw)
[This patch replaces the one sent earlier.]
The following patch contains the following changes to xml.el:
* All instances of "[:space:]" changed to " \t\n\r". This is
because the whitespace class does not include \r, but the XML
spec includes \r as whitespace. (I had previously submitted
the patch to change " \t\n" to "[:space:]", but I was wrong.)
See
<http://www.w3.org/TR/2000/REC-xml-20001006#sec-common-syn>.
* Replace "\r\n" and "\r" with "\n". See
<http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends>.
* Added attribute normalization. See
<http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize>.
* Added character references. See
<http://www.w3.org/TR/2000/REC-xml-20001006#sec-references>.
diff -c -r1.16 xml.el
*** xml.el 11 Mar 2003 21:57:46 -0000 1.16
--- xml.el 12 Mar 2003 21:02:24 -0000
***************
*** 184,190 ****
;; beginning of a document)
((looking-at "<\\?")
(search-forward "?>" end)
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(xml-parse-tag end))
;; Character data (CDATA) sections, in which no tag should be interpreted
((looking-at "<!\\[CDATA\\[")
--- 184,190 ----
;; beginning of a document)
((looking-at "<\\?")
(search-forward "?>" end)
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(xml-parse-tag end))
;; Character data (CDATA) sections, in which no tag should be interpreted
((looking-at "<!\\[CDATA\\[")
***************
*** 198,204 ****
(if parse-dtd
(setq dtd (xml-parse-dtd end))
(xml-skip-dtd end))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if dtd
(cons dtd (xml-parse-tag end))
(xml-parse-tag end))))
--- 198,204 ----
(if parse-dtd
(setq dtd (xml-parse-dtd end))
(xml-skip-dtd end))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if dtd
(cons dtd (xml-parse-tag end))
(xml-parse-tag end))))
***************
*** 210,216 ****
((looking-at "</")
'())
;; opening tag
! ((looking-at "<\\([^/>[:space:]]+\\)")
(goto-char (match-end 1))
(let* ((case-fold-search nil) ;; XML is case-sensitive.
(node-name (match-string 1))
--- 210,216 ----
((looking-at "</")
'())
;; opening tag
! ((looking-at "<\\([^/> \t\n\r]+\\)")
(goto-char (match-end 1))
(let* ((case-fold-search nil) ;; XML is case-sensitive.
(node-name (match-string 1))
***************
*** 219,225 ****
pos)
;; is this an empty element ?
! (if (looking-at "/[[:space:]]*>")
(progn
(forward-char 2)
(nreverse (cons '("") children)))
--- 219,225 ----
pos)
;; is this an empty element ?
! (if (looking-at "/[ \t\n\r]*>")
(progn
(forward-char 2)
(nreverse (cons '("") children)))
***************
*** 230,236 ****
(forward-char 1)
;; Now check that we have the right end-tag. Note that this
;; one might contain spaces after the tag name
! (while (not (looking-at (concat "</" node-name "[[:space:]]*>")))
(cond
((looking-at "</")
(error (concat
--- 230,236 ----
(forward-char 1)
;; Now check that we have the right end-tag. Note that this
;; one might contain spaces after the tag name
! (while (not (looking-at (concat "</" node-name "[ \t\n\r]*>")))
(cond
((looking-at "</")
(error (concat
***************
*** 248,259 ****
(let ((string (buffer-substring-no-properties pos (point)))
(pos 0))
! ;; Clean up the string (no newline characters)
! ;; Not done, since as per XML specifications, the XML processor
! ;; should always pass the whole string to the application.
! ;; (while (string-match "\\s +" string pos)
! ;; (setq string (replace-match " " t t string))
! ;; (setq pos (1+ (match-beginning 0))))
(setq string (xml-substitute-special string))
(setq children
--- 248,261 ----
(let ((string (buffer-substring-no-properties pos (point)))
(pos 0))
! ;; Clean up the string. As per XML
! ;; specifications, the XML processor should
! ;; always pass the whole string to the
! ;; application. But \r's should be replaced:
! ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
! (while (string-match "\r\n?" string pos)
! (setq string (replace-match "\n" t t string))
! (setq pos (1+ (match-beginning 0))))
(setq string (xml-substitute-special string))
(setq children
***************
*** 280,307 ****
The search for attributes end at the position END in the current buffer.
Leaves the point on the first non-blank character after the tag."
(let ((attlist ())
! name)
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[[:space:]]*=[[:space:]]*")
(setq name (intern (match-string 1)))
(goto-char (match-end 0))
;; Do we have a string between quotes (or double-quotes),
;; or a simple word ?
! (unless (looking-at "\"\\([^\"]*\\)\"")
! (unless (looking-at "'\\([^']*\\)'")
(error "XML: Attribute values must be given between quotes")))
;; Each attribute must be unique within a given element
(if (assoc name attlist)
(error "XML: each attribute must be unique within an element"))
! (push (cons name (match-string-no-properties 1)) attlist)
! (goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (> (point) end)
! (error "XML: end of attribute list not found before end of region"))
! )
(nreverse attlist)))
;;*******************************************************************
--- 282,325 ----
The search for attributes end at the position END in the current buffer.
Leaves the point on the first non-blank character after the tag."
(let ((attlist ())
! start-pos name)
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n\r]*=[ \t\n\r]*")
(setq name (intern (match-string 1)))
(goto-char (match-end 0))
+ ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
+
;; Do we have a string between quotes (or double-quotes),
;; or a simple word ?
! (if (looking-at "\"\\([^\"]*\\)\"")
! (setq start-pos (match-beginning 0))
! (if (looking-at "'\\([^']*\\)")
! (setq start-pos (match-beginning 0))
(error "XML: Attribute values must be given between quotes")))
;; Each attribute must be unique within a given element
(if (assoc name attlist)
(error "XML: each attribute must be unique within an element"))
! ;; Multiple whitespace characters should be replaced with a single one
! ;; in the attributes
! (let ((string (match-string-no-properties 1))
! (pos 0))
! (while (string-match "[ \t\n\r]+" string pos)
! (setq string (replace-match " " t nil string))
! (setq pos (1+ (match-beginning 0))))
! (push (cons name (xml-substitute-special string)) attlist))
!
! (goto-char start-pos)
! (if (looking-at "\"\\([^\"]*\\)\"")
! (goto-char (match-end 0))
! (if (looking-at "'\\([^']*\\)")
! (goto-char (match-end 0))))
!
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if (> (point) end)
! (error "XML: end of attribute list not found before end of region")))
(nreverse attlist)))
;;*******************************************************************
***************
*** 318,332 ****
The point must be just before the starting tag of the DTD.
This follows the rule [28] in the XML specifications."
(forward-char (length "<!DOCTYPE"))
! (if (looking-at "[[:space:]]*>")
(error "XML: invalid DTD (excepting name of the document)"))
(condition-case nil
(progn
! (forward-word 1) ;; name of the document
! (goto-char (- (re-search-forward "[[:space:]]") 1))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (looking-at "\\[")
! (re-search-forward "\\][[:space:]]*>" end)
(search-forward ">" end)))
(error (error "XML: No end to the DTD"))))
--- 336,350 ----
The point must be just before the starting tag of the DTD.
This follows the rule [28] in the XML specifications."
(forward-char (length "<!DOCTYPE"))
! (if (looking-at "[ \t\n\r]*>")
(error "XML: invalid DTD (excepting name of the document)"))
(condition-case nil
(progn
! (forward-word 1)
! (goto-char (- (re-search-forward "[ \t\n\r]") 1))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if (looking-at "\\[")
! (re-search-forward "\\][ \t\n\r]*>" end)
(search-forward ">" end)))
(error (error "XML: No end to the DTD"))))
***************
*** 334,340 ****
"Parse the DTD that point is looking at.
The DTD must end before the position END in the current buffer."
(forward-char (length "<!DOCTYPE"))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (looking-at ">")
(error "XML: invalid DTD (excepting name of the document)"))
--- 352,358 ----
"Parse the DTD that point is looking at.
The DTD must end before the position END in the current buffer."
(forward-char (length "<!DOCTYPE"))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if (looking-at ">")
(error "XML: invalid DTD (excepting name of the document)"))
***************
*** 344,350 ****
type element end-pos)
(goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
;; External DTDs => don't know how to handle them yet
(if (looking-at "SYSTEM")
--- 362,368 ----
type element end-pos)
(goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
;; External DTDs => don't know how to handle them yet
(if (looking-at "SYSTEM")
***************
*** 355,367 ****
;; Parse the rest of the DTD
(forward-char 1)
! (while (and (not (looking-at "[[:space:]]*\\]"))
(<= (point) end))
(cond
;; Translation of rule [45] of XML specifications
((looking-at
! "[[:space:]]*<!ELEMENT[[:space:]]+\\([a-zA-Z0-9.%;]+\\)[[:space:]]+\\([^>]+\\)>")
(setq element (intern (match-string-no-properties 1))
type (match-string-no-properties 2))
--- 373,385 ----
;; Parse the rest of the DTD
(forward-char 1)
! (while (and (not (looking-at "[ \t\n\r]*\\]"))
(<= (point) end))
(cond
;; Translation of rule [45] of XML specifications
((looking-at
! "[ \t\n\r]*<!ELEMENT[ \t\n\r]+\\([a-zA-Z0-9.%;]+\\)[ \t\n\r]+\\([^>]+\\)>")
(setq element (intern (match-string-no-properties 1))
type (match-string-no-properties 2))
***************
*** 369,381 ****
;; Translation of rule [46] of XML specifications
(cond
! ((string-match "^EMPTY[[:space:]]*$" type) ;; empty declaration
(setq type 'empty))
! ((string-match "^ANY[[:space:]]*$" type) ;; any type of contents
(setq type 'any))
! ((string-match "^(\\(.*\\))[[:space:]]*$" type) ;; children ([47])
(setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
! ((string-match "^%[^;]+;[[:space:]]*$" type) ;; substitution
nil)
(t
(error "XML: Invalid element type in the DTD")))
--- 387,399 ----
;; Translation of rule [46] of XML specifications
(cond
! ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
(setq type 'empty))
! ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
(setq type 'any))
! ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
(setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
! ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
nil)
(t
(error "XML: Invalid element type in the DTD")))
***************
*** 417,423 ****
(mapcar 'xml-parse-elem-type
(split-string elem ","))))
)))
! (if (string-match "[[:space:]]*\\([^+*?]+\\)\\([+*?]?\\)" string)
(setq elem (match-string 1 string)
modifier (match-string 2 string))))
--- 435,441 ----
(mapcar 'xml-parse-elem-type
(split-string elem ","))))
)))
! (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
(setq elem (match-string 1 string)
modifier (match-string 2 string))))
***************
*** 451,456 ****
--- 469,485 ----
(setq string (replace-match "'" t nil string)))
(while (string-match """ string)
(setq string (replace-match "\"" t nil string)))
+ (while (string-match "&#\\([0-9]+\\);" string)
+ (setq string (replace-match (char-to-ucs
+ (string-to-int
+ (match-string-no-properties 1 string)))
+ t nil string)))
+ (while (string-match "&#x\\([0-9a-fA-F]+\\);" string)
+ (setq string (replace-match (char-to-ucs
+ (hex-string-to-number
+ (match-string-no-properties 1 string)))
+ t nil string)))
+
;; This goes last so it doesn't confuse the matches above.
(while (string-match "&" string)
(setq string (replace-match "&" t nil string)))
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: xml.el patches for better spec compliance
2003-03-12 21:04 xml.el patches for better spec compliance Mark A. Hershberger
@ 2003-03-13 10:31 ` Juanma Barranquero
[not found] ` <mailman.3159.1047551561.21513.bug-gnu-emacs@gnu.org>
1 sibling, 0 replies; 7+ messages in thread
From: Juanma Barranquero @ 2003-03-13 10:31 UTC (permalink / raw)
Cc: bug-gnu-emacs
On Wed, 12 Mar 2003 15:04:11 -0600, mah@everybody.org (Mark A. Hershberger) wrote:
> [This patch replaces the one sent earlier.]
[...]
> *** 451,456 ****
> --- 469,485 ----
> (setq string (replace-match "'" t nil string)))
> (while (string-match """ string)
> (setq string (replace-match "\"" t nil string)))
> + (while (string-match "&#\\([0-9]+\\);" string)
> + (setq string (replace-match (char-to-ucs
> + (string-to-int
> + (match-string-no-properties 1 string)))
> + t nil string)))
> + (while (string-match "&#x\\([0-9a-fA-F]+\\);" string)
> + (setq string (replace-match (char-to-ucs
> + (hex-string-to-number
> + (match-string-no-properties 1 string)))
> + t nil string)))
> +
> ;; This goes last so it doesn't confuse the matches above.
> (while (string-match "&" string)
> (setq string (replace-match "&" t nil string)))
Are `char-to-ucs' and `hex-string-to-number' XEmacs functions?
/L/e/k/t/u
^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <mailman.3159.1047551561.21513.bug-gnu-emacs@gnu.org>]
* Re: xml.el patches for better spec compliance
[not found] ` <mailman.3159.1047551561.21513.bug-gnu-emacs@gnu.org>
@ 2003-03-13 17:03 ` Mark A. Hershberger
2003-03-14 17:03 ` Mark A. Hershberger
0 siblings, 1 reply; 7+ messages in thread
From: Mark A. Hershberger @ 2003-03-13 17:03 UTC (permalink / raw)
Cc: bug-gnu-emacs
Juanma Barranquero <lektu@terra.es> writes:
> Are `char-to-ucs' and `hex-string-to-number' XEmacs functions?
No. I'm running GNU Emacs on Debian with mule-ucs. The function
comes from the mule-ucs package.
Which brings me to the question I have today:
XML use ISO-10646 (UCS) as the character set. Since Emacs21
has support for Unicode, how do I go from a UCS code-point to
something I can use in a string?
Mark.
--
You are a mystery as deep as the sea; the more I search, the more
I find, and the more I find the more I search for you.
-- St. Catherine of Siena
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: xml.el patches for better spec compliance
2003-03-13 17:03 ` Mark A. Hershberger
@ 2003-03-14 17:03 ` Mark A. Hershberger
0 siblings, 0 replies; 7+ messages in thread
From: Mark A. Hershberger @ 2003-03-14 17:03 UTC (permalink / raw)
mah@everybody.org (Mark A. Hershberger) writes:
> XML use ISO-10646 (UCS) as the character set. Since Emacs21
> has support for Unicode, how do I go from a UCS code-point to
> something I can use in a string?
Answering my own query:
international/mule.el includes "decode-char" which should be used
for this.
--
You are a mystery as deep as the sea; the more I search, the more
I find, and the more I find the more I search for you.
-- St. Catherine of Siena
^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <mailman.3142.1047498362.21513.bug-gnu-emacs@gnu.org>]
* Re: xml.el patches for better spec compliance
[not found] <mailman.3142.1047498362.21513.bug-gnu-emacs@gnu.org>
@ 2003-03-14 17:56 ` Mark A. Hershberger
2003-03-14 23:51 ` Juanma Barranquero
0 siblings, 1 reply; 7+ messages in thread
From: Mark A. Hershberger @ 2003-03-14 17:56 UTC (permalink / raw)
2003-03-14 Mark A. Hershberger <mah@everybody.org>
* xml.el (xml-ucs-to-string): New function to convert Unicode
codepoints to strings. Uses decode-char (mule.el) if
available.
(xml-parse-tag, xml-parse-attlist, xml-skip-dtd, xml-parse-dtd)
(xml-parse-elem-type): Use ' \t\n\r' instead of '[:space:]'
(xml-parse-attlist): Added attribute normalization.
(xml-parse-tag): Replace "\r\n" and "\r" with "\n".
diff -c -r1.16 xml.el
*** xml.el 11 Mar 2003 21:57:46 -0000 1.16
--- xml.el 14 Mar 2003 17:37:07 -0000
***************
*** 184,190 ****
;; beginning of a document)
((looking-at "<\\?")
(search-forward "?>" end)
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(xml-parse-tag end))
;; Character data (CDATA) sections, in which no tag should be interpreted
((looking-at "<!\\[CDATA\\[")
--- 184,190 ----
;; beginning of a document)
((looking-at "<\\?")
(search-forward "?>" end)
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(xml-parse-tag end))
;; Character data (CDATA) sections, in which no tag should be interpreted
((looking-at "<!\\[CDATA\\[")
***************
*** 198,204 ****
(if parse-dtd
(setq dtd (xml-parse-dtd end))
(xml-skip-dtd end))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if dtd
(cons dtd (xml-parse-tag end))
(xml-parse-tag end))))
--- 198,204 ----
(if parse-dtd
(setq dtd (xml-parse-dtd end))
(xml-skip-dtd end))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if dtd
(cons dtd (xml-parse-tag end))
(xml-parse-tag end))))
***************
*** 210,216 ****
((looking-at "</")
'())
;; opening tag
! ((looking-at "<\\([^/>[:space:]]+\\)")
(goto-char (match-end 1))
(let* ((case-fold-search nil) ;; XML is case-sensitive.
(node-name (match-string 1))
--- 210,216 ----
((looking-at "</")
'())
;; opening tag
! ((looking-at "<\\([^/> \t\n\r]+\\)")
(goto-char (match-end 1))
(let* ((case-fold-search nil) ;; XML is case-sensitive.
(node-name (match-string 1))
***************
*** 219,225 ****
pos)
;; is this an empty element ?
! (if (looking-at "/[[:space:]]*>")
(progn
(forward-char 2)
(nreverse (cons '("") children)))
--- 219,225 ----
pos)
;; is this an empty element ?
! (if (looking-at "/[ \t\n\r]*>")
(progn
(forward-char 2)
(nreverse (cons '("") children)))
***************
*** 230,236 ****
(forward-char 1)
;; Now check that we have the right end-tag. Note that this
;; one might contain spaces after the tag name
! (while (not (looking-at (concat "</" node-name "[[:space:]]*>")))
(cond
((looking-at "</")
(error (concat
--- 230,236 ----
(forward-char 1)
;; Now check that we have the right end-tag. Note that this
;; one might contain spaces after the tag name
! (while (not (looking-at (concat "</" node-name "[ \t\n\r]*>")))
(cond
((looking-at "</")
(error (concat
***************
*** 248,259 ****
(let ((string (buffer-substring-no-properties pos (point)))
(pos 0))
! ;; Clean up the string (no newline characters)
! ;; Not done, since as per XML specifications, the XML processor
! ;; should always pass the whole string to the application.
! ;; (while (string-match "\\s +" string pos)
! ;; (setq string (replace-match " " t t string))
! ;; (setq pos (1+ (match-beginning 0))))
(setq string (xml-substitute-special string))
(setq children
--- 248,261 ----
(let ((string (buffer-substring-no-properties pos (point)))
(pos 0))
! ;; Clean up the string. As per XML
! ;; specifications, the XML processor should
! ;; always pass the whole string to the
! ;; application. But \r's should be replaced:
! ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
! (while (string-match "\r\n?" string pos)
! (setq string (replace-match "\n" t t string))
! (setq pos (1+ (match-beginning 0))))
(setq string (xml-substitute-special string))
(setq children
***************
*** 280,307 ****
The search for attributes end at the position END in the current buffer.
Leaves the point on the first non-blank character after the tag."
(let ((attlist ())
! name)
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[[:space:]]*=[[:space:]]*")
(setq name (intern (match-string 1)))
(goto-char (match-end 0))
;; Do we have a string between quotes (or double-quotes),
;; or a simple word ?
! (unless (looking-at "\"\\([^\"]*\\)\"")
! (unless (looking-at "'\\([^']*\\)'")
(error "XML: Attribute values must be given between quotes")))
;; Each attribute must be unique within a given element
(if (assoc name attlist)
(error "XML: each attribute must be unique within an element"))
! (push (cons name (match-string-no-properties 1)) attlist)
! (goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (> (point) end)
! (error "XML: end of attribute list not found before end of region"))
! )
(nreverse attlist)))
;;*******************************************************************
--- 282,325 ----
The search for attributes end at the position END in the current buffer.
Leaves the point on the first non-blank character after the tag."
(let ((attlist ())
! start-pos name)
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n\r]*=[ \t\n\r]*")
(setq name (intern (match-string 1)))
(goto-char (match-end 0))
+ ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
+
;; Do we have a string between quotes (or double-quotes),
;; or a simple word ?
! (if (looking-at "\"\\([^\"]*\\)\"")
! (setq start-pos (match-beginning 0))
! (if (looking-at "'\\([^']*\\)")
! (setq start-pos (match-beginning 0))
(error "XML: Attribute values must be given between quotes")))
;; Each attribute must be unique within a given element
(if (assoc name attlist)
(error "XML: each attribute must be unique within an element"))
! ;; Multiple whitespace characters should be replaced with a single one
! ;; in the attributes
! (let ((string (match-string-no-properties 1))
! (pos 0))
! (while (string-match "[ \t\n\r]+" string pos)
! (setq string (replace-match " " t nil string))
! (setq pos (1+ (match-beginning 0))))
! (push (cons name (xml-substitute-special string)) attlist))
!
! (goto-char start-pos)
! (if (looking-at "\"\\([^\"]*\\)\"")
! (goto-char (match-end 0))
! (if (looking-at "'\\([^']*\\)")
! (goto-char (match-end 0))))
!
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if (> (point) end)
! (error "XML: end of attribute list not found before end of region")))
(nreverse attlist)))
;;*******************************************************************
***************
*** 318,332 ****
The point must be just before the starting tag of the DTD.
This follows the rule [28] in the XML specifications."
(forward-char (length "<!DOCTYPE"))
! (if (looking-at "[[:space:]]*>")
(error "XML: invalid DTD (excepting name of the document)"))
(condition-case nil
(progn
! (forward-word 1) ;; name of the document
! (goto-char (- (re-search-forward "[[:space:]]") 1))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (looking-at "\\[")
! (re-search-forward "\\][[:space:]]*>" end)
(search-forward ">" end)))
(error (error "XML: No end to the DTD"))))
--- 336,350 ----
The point must be just before the starting tag of the DTD.
This follows the rule [28] in the XML specifications."
(forward-char (length "<!DOCTYPE"))
! (if (looking-at "[ \t\n\r]*>")
(error "XML: invalid DTD (excepting name of the document)"))
(condition-case nil
(progn
! (forward-word 1)
! (goto-char (- (re-search-forward "[ \t\n\r]") 1))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if (looking-at "\\[")
! (re-search-forward "\\][ \t\n\r]*>" end)
(search-forward ">" end)))
(error (error "XML: No end to the DTD"))))
***************
*** 334,340 ****
"Parse the DTD that point is looking at.
The DTD must end before the position END in the current buffer."
(forward-char (length "<!DOCTYPE"))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (looking-at ">")
(error "XML: invalid DTD (excepting name of the document)"))
--- 352,358 ----
"Parse the DTD that point is looking at.
The DTD must end before the position END in the current buffer."
(forward-char (length "<!DOCTYPE"))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if (looking-at ">")
(error "XML: invalid DTD (excepting name of the document)"))
***************
*** 344,367 ****
type element end-pos)
(goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
! ;; External DTDs => don't know how to handle them yet
(if (looking-at "SYSTEM")
(error "XML: Don't know how to handle external DTDs"))
(if (not (= (char-after) ?\[))
(error "XML: Unknown declaration in the DTD"))
! ;; Parse the rest of the DTD
(forward-char 1)
! (while (and (not (looking-at "[[:space:]]*\\]"))
(<= (point) end))
(cond
;; Translation of rule [45] of XML specifications
((looking-at
! "[[:space:]]*<!ELEMENT[[:space:]]+\\([a-zA-Z0-9.%;]+\\)[[:space:]]+\\([^>]+\\)>")
(setq element (intern (match-string-no-properties 1))
type (match-string-no-properties 2))
--- 362,385 ----
type element end-pos)
(goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
! ;; External DTDs => don't know how to handle them yet
(if (looking-at "SYSTEM")
(error "XML: Don't know how to handle external DTDs"))
(if (not (= (char-after) ?\[))
(error "XML: Unknown declaration in the DTD"))
! ;; Parse the rest of the DTD
(forward-char 1)
! (while (and (not (looking-at "[ \t\n\r]*\\]"))
(<= (point) end))
(cond
;; Translation of rule [45] of XML specifications
((looking-at
! "[ \t\n\r]*<!ELEMENT[ \t\n\r]+\\([a-zA-Z0-9.%;]+\\)[ \t\n\r]+\\([^>]+\\)>")
(setq element (intern (match-string-no-properties 1))
type (match-string-no-properties 2))
***************
*** 369,381 ****
;; Translation of rule [46] of XML specifications
(cond
! ((string-match "^EMPTY[[:space:]]*$" type) ;; empty declaration
(setq type 'empty))
! ((string-match "^ANY[[:space:]]*$" type) ;; any type of contents
(setq type 'any))
! ((string-match "^(\\(.*\\))[[:space:]]*$" type) ;; children ([47])
(setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
! ((string-match "^%[^;]+;[[:space:]]*$" type) ;; substitution
nil)
(t
(error "XML: Invalid element type in the DTD")))
--- 387,399 ----
;; Translation of rule [46] of XML specifications
(cond
! ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
(setq type 'empty))
! ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
(setq type 'any))
! ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
(setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
! ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
nil)
(t
(error "XML: Invalid element type in the DTD")))
***************
*** 417,424 ****
(mapcar 'xml-parse-elem-type
(split-string elem ","))))
)))
! (if (string-match "[[:space:]]*\\([^+*?]+\\)\\([+*?]?\\)" string)
! (setq elem (match-string 1 string)
modifier (match-string 2 string))))
(if (and (stringp elem) (string= elem "#PCDATA"))
--- 435,442 ----
(mapcar 'xml-parse-elem-type
(split-string elem ","))))
)))
! (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
! (setq elem (match-string 1 string)
modifier (match-string 2 string))))
(if (and (stringp elem) (string= elem "#PCDATA"))
***************
*** 434,439 ****
--- 452,473 ----
(t
elem))))
+ ;;*******************************************************************
+ ;;**
+ ;;** Converting code points to strings
+ ;;**
+ ;;*******************************************************************
+
+ (defun xml-ucs-to-string (codepoint)
+ "Return a string representation of CODEPOINT. If it can't be
+ converted, return '?'."
+ (cond ((boundp 'decode-char)
+ (char-to-string (decode-char 'ucs codepoint)))
+ ((and (< codepoint 128)
+ (> codepoint 31))
+ (char-to-string codepoint))
+ (t "?"))) ; FIXME: There's gotta be a better way to
+ ; designate an unknown character.
;;*******************************************************************
;;**
***************
*** 451,456 ****
--- 485,501 ----
(setq string (replace-match "'" t nil string)))
(while (string-match """ string)
(setq string (replace-match "\"" t nil string)))
+ (while (string-match "&#\\([0-9]+\\);" string)
+ (setq string (replace-match (xml-ucs-to-string
+ (string-to-int
+ (match-string-no-properties 1 string)))
+ t nil string)))
+ (while (string-match "&#x\\([0-9a-fA-F]+\\);" string)
+ (setq string (replace-match (xml-ucs-to-string
+ (hex-string-to-number
+ (match-string-no-properties 1 string)))
+ t nil string)))
+
;; This goes last so it doesn't confuse the matches above.
(while (string-match "&" string)
(setq string (replace-match "&" t nil string)))
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: xml.el patches for better spec compliance
2003-03-14 17:56 ` Mark A. Hershberger
@ 2003-03-14 23:51 ` Juanma Barranquero
0 siblings, 0 replies; 7+ messages in thread
From: Juanma Barranquero @ 2003-03-14 23:51 UTC (permalink / raw)
Cc: bug-gnu-emacs
On Fri, 14 Mar 2003 11:56:21 -0600, mah@everybody.org (Mark A. Hershberger) wrote:
> + (while (string-match "&#\\([0-9]+\\);" string)
> + (setq string (replace-match (xml-ucs-to-string
> + (string-to-int
> + (match-string-no-properties 1 string)))
> + t nil string)))
> + (while (string-match "&#x\\([0-9a-fA-F]+\\);" string)
> + (setq string (replace-match (xml-ucs-to-string
> + (hex-string-to-number
> + (match-string-no-properties 1 string)))
> + t nil string)))
> +
hex-string-to-number still doesn't exists, but you can use
(string-to-number
(match-string-no-properties 1 string)
16)
/L/e/k/t/u
^ permalink raw reply [flat|nested] 7+ messages in thread
* xml.el patches for better spec compliance
@ 2003-03-12 19:42 Mark A. Hershberger
0 siblings, 0 replies; 7+ messages in thread
From: Mark A. Hershberger @ 2003-03-12 19:42 UTC (permalink / raw)
The following patch contains the following changes to xml.el:
* All instances of "[:space:]" changed to " \t\n\r". This is
because the whitespace class does not include \r, but the XML
spec includes \r as whitespace. (I had previously submitted
the patch to change " \t\n" to "[:space:]", but I was wrong.)
See
<http://www.w3.org/TR/2000/REC-xml-20001006#sec-common-syn>.
* Replace "\r\n" and "\r" with "\n". See
<http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends>.
* Added attribute normalization. See
<http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize>.
* Added character references. See
<http://www.w3.org/TR/2000/REC-xml-20001006#sec-references>.
diff -c -r1.16 xml.el
*** xml.el 11 Mar 2003 21:57:46 -0000 1.16
--- xml.el 12 Mar 2003 19:36:01 -0000
***************
*** 184,190 ****
;; beginning of a document)
((looking-at "<\\?")
(search-forward "?>" end)
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(xml-parse-tag end))
;; Character data (CDATA) sections, in which no tag should be interpreted
((looking-at "<!\\[CDATA\\[")
--- 184,190 ----
;; beginning of a document)
((looking-at "<\\?")
(search-forward "?>" end)
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(xml-parse-tag end))
;; Character data (CDATA) sections, in which no tag should be interpreted
((looking-at "<!\\[CDATA\\[")
***************
*** 198,204 ****
(if parse-dtd
(setq dtd (xml-parse-dtd end))
(xml-skip-dtd end))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if dtd
(cons dtd (xml-parse-tag end))
(xml-parse-tag end))))
--- 198,204 ----
(if parse-dtd
(setq dtd (xml-parse-dtd end))
(xml-skip-dtd end))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if dtd
(cons dtd (xml-parse-tag end))
(xml-parse-tag end))))
***************
*** 210,216 ****
((looking-at "</")
'())
;; opening tag
! ((looking-at "<\\([^/>[:space:]]+\\)")
(goto-char (match-end 1))
(let* ((case-fold-search nil) ;; XML is case-sensitive.
(node-name (match-string 1))
--- 210,216 ----
((looking-at "</")
'())
;; opening tag
! ((looking-at "<\\([^/> \t\n\r]+\\)")
(goto-char (match-end 1))
(let* ((case-fold-search nil) ;; XML is case-sensitive.
(node-name (match-string 1))
***************
*** 219,225 ****
pos)
;; is this an empty element ?
! (if (looking-at "/[[:space:]]*>")
(progn
(forward-char 2)
(nreverse (cons '("") children)))
--- 219,225 ----
pos)
;; is this an empty element ?
! (if (looking-at "/[ \t\n\r]*>")
(progn
(forward-char 2)
(nreverse (cons '("") children)))
***************
*** 230,236 ****
(forward-char 1)
;; Now check that we have the right end-tag. Note that this
;; one might contain spaces after the tag name
! (while (not (looking-at (concat "</" node-name "[[:space:]]*>")))
(cond
((looking-at "</")
(error (concat
--- 230,236 ----
(forward-char 1)
;; Now check that we have the right end-tag. Note that this
;; one might contain spaces after the tag name
! (while (not (looking-at (concat "</" node-name "[ \t\n\r]*>")))
(cond
((looking-at "</")
(error (concat
***************
*** 248,259 ****
(let ((string (buffer-substring-no-properties pos (point)))
(pos 0))
! ;; Clean up the string (no newline characters)
! ;; Not done, since as per XML specifications, the XML processor
! ;; should always pass the whole string to the application.
! ;; (while (string-match "\\s +" string pos)
! ;; (setq string (replace-match " " t t string))
! ;; (setq pos (1+ (match-beginning 0))))
(setq string (xml-substitute-special string))
(setq children
--- 248,261 ----
(let ((string (buffer-substring-no-properties pos (point)))
(pos 0))
! ;; Clean up the string. As per XML
! ;; specifications, the XML processor should
! ;; always pass the whole string to the
! ;; application. But \r's should be replaced:
! ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
! (while (string-match "\r\n?" string pos)
! (setq string (replace-match "\n" t t string))
! (setq pos (1+ (match-beginning 0))))
(setq string (xml-substitute-special string))
(setq children
***************
*** 280,307 ****
The search for attributes end at the position END in the current buffer.
Leaves the point on the first non-blank character after the tag."
(let ((attlist ())
! name)
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[[:space:]]*=[[:space:]]*")
(setq name (intern (match-string 1)))
(goto-char (match-end 0))
;; Do we have a string between quotes (or double-quotes),
;; or a simple word ?
! (unless (looking-at "\"\\([^\"]*\\)\"")
! (unless (looking-at "'\\([^']*\\)'")
(error "XML: Attribute values must be given between quotes")))
;; Each attribute must be unique within a given element
(if (assoc name attlist)
(error "XML: each attribute must be unique within an element"))
! (push (cons name (match-string-no-properties 1)) attlist)
! (goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (> (point) end)
! (error "XML: end of attribute list not found before end of region"))
! )
(nreverse attlist)))
;;*******************************************************************
--- 282,324 ----
The search for attributes end at the position END in the current buffer.
Leaves the point on the first non-blank character after the tag."
(let ((attlist ())
! start-pos name)
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
! (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n\r]*=[ \t\n\r]*")
(setq name (intern (match-string 1)))
(goto-char (match-end 0))
+ ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
+
;; Do we have a string between quotes (or double-quotes),
;; or a simple word ?
! (if (looking-at "\"\\([^\"]*\\)\"")
! (setq start-pos (match-beginning 0))
! (if (looking-at "'\\([^']*\\)")
! (setq start-pos (match-beginning 0))
(error "XML: Attribute values must be given between quotes")))
;; Each attribute must be unique within a given element
(if (assoc name attlist)
(error "XML: each attribute must be unique within an element"))
! ;; Multiple whitespace characters should be replaced with a single one
! ;; in the attributes
! (let ((string (match-string-no-properties 1))
! (pos 0))
! (while (string-match "[ \t\n\r]+" string pos)
! (setq string (replace-match " " t nil string))
! (setq pos (1+ (match-beginning 0))))
! (push (cons name (xml-substitute-special string)) attlist))
!
! (goto-char start-pos)
! (if (looking-at "\"\\([^\"]*\\)\"")
! (goto-char (match-end 0))
! (if (looking-at "'\\([^']*\\)")
! (goto-char (match-end 0))))
!
(if (> (point) end)
! (error "XML: end of attribute list not found before end of region")))
(nreverse attlist)))
;;*******************************************************************
***************
*** 318,332 ****
The point must be just before the starting tag of the DTD.
This follows the rule [28] in the XML specifications."
(forward-char (length "<!DOCTYPE"))
! (if (looking-at "[[:space:]]*>")
(error "XML: invalid DTD (excepting name of the document)"))
(condition-case nil
(progn
(forward-word 1) ;; name of the document
! (goto-char (- (re-search-forward "[[:space:]]") 1))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (looking-at "\\[")
! (re-search-forward "\\][[:space:]]*>" end)
(search-forward ">" end)))
(error (error "XML: No end to the DTD"))))
--- 335,349 ----
The point must be just before the starting tag of the DTD.
This follows the rule [28] in the XML specifications."
(forward-char (length "<!DOCTYPE"))
! (if (looking-at "[ \t\n\r]*>")
(error "XML: invalid DTD (excepting name of the document)"))
(condition-case nil
(progn
(forward-word 1) ;; name of the document
! (goto-char (- (re-search-forward "[ \t\n\r]") 1))
! (goto-char (- (re-search-forward "[^\t\n\r]") 1))
(if (looking-at "\\[")
! (re-search-forward "\\][ \t\n\r]*>" end)
(search-forward ">" end)))
(error (error "XML: No end to the DTD"))))
***************
*** 334,340 ****
"Parse the DTD that point is looking at.
The DTD must end before the position END in the current buffer."
(forward-char (length "<!DOCTYPE"))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
(if (looking-at ">")
(error "XML: invalid DTD (excepting name of the document)"))
--- 351,357 ----
"Parse the DTD that point is looking at.
The DTD must end before the position END in the current buffer."
(forward-char (length "<!DOCTYPE"))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
(if (looking-at ">")
(error "XML: invalid DTD (excepting name of the document)"))
***************
*** 344,350 ****
type element end-pos)
(goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^[:space:]]") 1))
;; External DTDs => don't know how to handle them yet
(if (looking-at "SYSTEM")
--- 361,367 ----
type element end-pos)
(goto-char (match-end 0))
! (goto-char (- (re-search-forward "[^ \t\n\r]") 1))
;; External DTDs => don't know how to handle them yet
(if (looking-at "SYSTEM")
***************
*** 355,367 ****
;; Parse the rest of the DTD
(forward-char 1)
! (while (and (not (looking-at "[[:space:]]*\\]"))
(<= (point) end))
(cond
;; Translation of rule [45] of XML specifications
((looking-at
! "[[:space:]]*<!ELEMENT[[:space:]]+\\([a-zA-Z0-9.%;]+\\)[[:space:]]+\\([^>]+\\)>")
(setq element (intern (match-string-no-properties 1))
type (match-string-no-properties 2))
--- 372,384 ----
;; Parse the rest of the DTD
(forward-char 1)
! (while (and (not (looking-at "[ \t\n\r]*\\]"))
(<= (point) end))
(cond
;; Translation of rule [45] of XML specifications
((looking-at
! "[ \t\n\r]*<!ELEMENT[ \t\n\r]+\\([a-zA-Z0-9.%;]+\\)[ \t\n\r]+\\([^>]+\\)>")
(setq element (intern (match-string-no-properties 1))
type (match-string-no-properties 2))
***************
*** 369,381 ****
;; Translation of rule [46] of XML specifications
(cond
! ((string-match "^EMPTY[[:space:]]*$" type) ;; empty declaration
(setq type 'empty))
! ((string-match "^ANY[[:space:]]*$" type) ;; any type of contents
(setq type 'any))
! ((string-match "^(\\(.*\\))[[:space:]]*$" type) ;; children ([47])
(setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
! ((string-match "^%[^;]+;[[:space:]]*$" type) ;; substitution
nil)
(t
(error "XML: Invalid element type in the DTD")))
--- 386,398 ----
;; Translation of rule [46] of XML specifications
(cond
! ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
(setq type 'empty))
! ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
(setq type 'any))
! ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
(setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
! ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
nil)
(t
(error "XML: Invalid element type in the DTD")))
***************
*** 417,423 ****
(mapcar 'xml-parse-elem-type
(split-string elem ","))))
)))
! (if (string-match "[[:space:]]*\\([^+*?]+\\)\\([+*?]?\\)" string)
(setq elem (match-string 1 string)
modifier (match-string 2 string))))
--- 434,440 ----
(mapcar 'xml-parse-elem-type
(split-string elem ","))))
)))
! (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
(setq elem (match-string 1 string)
modifier (match-string 2 string))))
***************
*** 451,456 ****
--- 468,484 ----
(setq string (replace-match "'" t nil string)))
(while (string-match """ string)
(setq string (replace-match "\"" t nil string)))
+ (while (string-match "&#\\([0-9]+\\);" string)
+ (setq string (replace-match (char-to-string
+ (string-to-int
+ (match-string-no-properties 1 string)))
+ t nil string)))
+ (while (string-match "&#x\\([0-9a-fA-F]+\\);" string)
+ (setq string (replace-match (char-to-string
+ (hex-string-to-number
+ (match-string-no-properties 1 string)))
+ t nil string)))
+
;; This goes last so it doesn't confuse the matches above.
(while (string-match "&" string)
(setq string (replace-match "&" t nil string)))
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2003-03-14 23:51 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-03-12 21:04 xml.el patches for better spec compliance Mark A. Hershberger
2003-03-13 10:31 ` Juanma Barranquero
[not found] ` <mailman.3159.1047551561.21513.bug-gnu-emacs@gnu.org>
2003-03-13 17:03 ` Mark A. Hershberger
2003-03-14 17:03 ` Mark A. Hershberger
[not found] <mailman.3142.1047498362.21513.bug-gnu-emacs@gnu.org>
2003-03-14 17:56 ` Mark A. Hershberger
2003-03-14 23:51 ` Juanma Barranquero
-- strict thread matches above, loose matches on Subject: below --
2003-03-12 19:42 Mark A. Hershberger
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/emacs.git
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.