unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: mah@everybody.org (Mark A. Hershberger)
Cc: emacs-pretest-bug@gnu.org, emacs-devel@gnu.org
Subject: Re: Refactoring xml.el namespace handling
Date: Wed, 03 Mar 2004 00:35:42 -0600	[thread overview]
Message-ID: <878yiimo8x.fsf@weblog.localhost> (raw)
In-Reply-To: <jwveksa9aud.fsf-monnier+emacs/devel@asado.iro.umontreal.ca> (Stefan Monnier's message of "02 Mar 2004 16:52:19 -0500")

Stefan Monnier <monnier@iro.umontreal.ca> writes:

> Could you send a patch relative to the CVS head version?
> Your patch does not apply cleanly to the current code.

Argh.  My CVS skillz are lacking.

Now that I've gotten a good HEAD, here is a fresh diff.

2004-03-03  Mark A. Hershberger  <mah@everybody.org>

	* xml.el (xml-maybe-do-ns): new function to handle namespace
	parsing of both attribute and element names.
	(xml-ns-parse-ns-attrs, xml-ns-expand-el, xml-ns-expand-attr,
	xml-intern-attrlist): Removed in favor of xml-maybe-do-ns to avoid
	un-necessary intern-ing.
	(xml-parse-tag): Updated assumed namespaces.  Cleaned up namespace
	parsing.
	(xml-parse-attlist): Now does its own namespace parsing work.
	(xml-parse-dtd): Updated <!ELEMENT parsing to accept elements with
	hyphens.  Now skips ENTITY, ATTLIST, and NOTATION instead of
	barfing.


--- xml.el	2 Mar 2004 21:45:06 -0000	1.30
+++ xml.el	3 Mar 2004 06:17:00 -0000
@@ -52,15 +52,15 @@
 
 ;;; LIST FORMAT
 
-;; The functions `xml-parse-file' and `xml-parse-tag' return a list with
-;; the following format:
+;; The functions `xml-parse-file', `xml-parse-region' and
+;; `xml-parse-tag' return a list with the following format:
 ;;
 ;;    xml-list   ::= (node node ...)
-;;    node       ::= (tag_name attribute-list . child_node_list)
+;;    node       ::= (qname attribute-list . child_node_list)
 ;;    child_node_list ::= child_node child_node ...
 ;;    child_node ::= node | string
-;;    tag_name   ::= string
-;;    attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...)
+;;    qname      ::= (:namespace-uri . "name") | "name"
+;;    attribute_list ::= ((qname . "value") (qname . "value") ...)
 ;;                       | nil
 ;;    string     ::= "..."
 ;;
@@ -68,6 +68,11 @@
 ;; Whitespace is preserved.  Fixme: There should be a tree-walker that
 ;; can remove it.
 
+;; TODO:
+;;  * xml:base, xml:space support
+;;  * more complete DOCTYPE parsing
+;;  * pi support
+
 ;;; Code:
 
 ;; Note that {buffer-substring,match-string}-no-properties were
@@ -230,72 +235,37 @@
 	      (cons dtd (nreverse xml))
 	    (nreverse xml)))))))
 
-(defun xml-ns-parse-ns-attrs (attr-list &optional xml-ns)
-  "Parse the namespace attributes and return a list of cons in the form:
-\(namespace . prefix)"
-
-  (mapcar
-   (lambda (attr)
-     (let* ((splitup (split-string (car attr) ":"))
-	    (prefix (nth 0 splitup))
-	    (lname (nth 1 splitup)))
-       (when (string= "xmlns" prefix)
-	 (push (cons (if lname
-			 lname
-		       "")
-		     (cdr attr))
-	       xml-ns)))) attr-list)
-  xml-ns)
-
-;; expand element names
-(defun xml-ns-expand-el (el xml-ns)
-  "Expand the XML elements from \"prefix:local-name\" to a cons in the form
-\"(namespace . local-name)\"."
-
-  (let* ((splitup (split-string el ":"))
-	 (lname (or (nth 1 splitup)
-		    (nth 0 splitup)))
-	 (prefix (if (nth 1 splitup)
-		     (nth 0 splitup)
-		   (if (string= lname "xmlns")
-		       "xmlns"
-		     "")))
-	 (ns (cdr (assoc-string prefix xml-ns))))
-    (if (string= "" ns)
-	lname
-      (cons (intern (concat ":" ns))
-	    lname))))
-
-;; expand attribute names
-(defun xml-ns-expand-attr (attr-list xml-ns)
-  "Expand the attribute list for a particular element from the form
-\"prefix:local-name\" to the form \"{namespace}:local-name\"."
-
-  (mapcar
-   (lambda (attr)
-     (let* ((splitup (split-string (car attr) ":"))
+(defun xml-maybe-do-ns (name default xml-ns)
+  "Perform any namespace expansion.  NAME is the name to perform the expansion on.
+DEFAULT is the default namespace.  XML-NS is a cons of namespace
+names to uris.  When namespace-aware parsing is off, then XML-NS
+is nil.
+
+During namespace-aware parsing, any name without a namespace is
+put into the namespace identified by DEFAULT.  nil is used to
+specify that the name shouldn't be given a namespace."
+  (if (consp xml-ns)
+      (let* ((splitup (split-string name ":"))
 	    (lname (or (nth 1 splitup)
 		       (nth 0 splitup)))
 	    (prefix (if (nth 1 splitup)
 			(nth 0 splitup)
-		      (if (string= (car attr) "xmlns")
-			  "xmlns"
-			"")))
-	    (ns (cdr (assoc-string prefix xml-ns))))
-       (setcar attr
-	       (if (string= "" ns)
-		   lname
-		 (cons (intern (concat ":" ns))
-		       lname)))))
-   attr-list)
-  attr-list)
-
-(defun xml-intern-attrlist (attr-list)
-  "Convert attribute names to symbols for backward compatibility."
-  (mapcar (lambda (attr)
-	    (setcar attr (intern (car attr))))
-	  attr-list)
-  attr-list)
+                       default))
+             (ns  (progn 
+                     (if (and 
+                          (string-equal lname "xmlns")
+                          (not prefix))
+                         (cdr (assoc "xmlns" xml-ns))
+                       (cdr (assoc prefix xml-ns))))))
+        (if ns
+             (cons ns
+                   (if (and
+                        (string-equal lname "xmlns")
+                        (not prefix))
+                       ""
+                     lname))
+           lname))
+    (intern name)))
 
 (defun xml-parse-tag (&optional parse-dtd parse-ns)
   "Parse the tag at point.
@@ -308,12 +278,12 @@
  - a pair : the first element is the DTD, the second is the node."
   (let ((xml-ns (if (consp parse-ns)
 		    parse-ns
-		  (if parse-ns
+		  (when parse-ns
 		      (list
-		       ;; Default no namespace
-		       (cons "" "")
+		       ;; "xml" namespace
+		       (cons "xml"   :http://www.w3.org/XML/1998/namespace)
 		       ;; We need to seed the xmlns namespace
-		       (cons "xmlns" "http://www.w3.org/2000/xmlns/"))))))
+		       (cons "xmlns" :http://www.w3.org/2000/xmlns/))))))
     (cond
      ;; Processing instructions (like the <?xml version="1.0"?> tag at the
      ;; beginning of a document).
@@ -350,19 +320,25 @@
 
       ;; Parse this node
       (let* ((node-name (match-string 1))
-	     (attr-list (xml-parse-attlist))
-	     (children (if  (consp xml-ns) ;; take care of namespace parsing
-			    (progn
-			      (setq xml-ns (xml-ns-parse-ns-attrs
-					    attr-list xml-ns))
-			      (list (xml-ns-expand-attr
-				     attr-list xml-ns)
-				    (xml-ns-expand-el
-				     node-name xml-ns)))
-			    (list (xml-intern-attrlist attr-list)
-				  (intern node-name))))
+             ;; Parse the attribute list.
+             (children (list (xml-parse-attlist xml-ns) node-name))
 	     pos)
 
+        ;; add the xmlns:* attrs to our cache
+        (when (consp xml-ns)
+          (mapcar
+           (lambda (attr)
+             (when (and (listp (car attr))
+                        (eq :http://www.w3.org/2000/xmlns/
+                            (caar attr)))
+               (setq xml-ns (append (list (cons (cdar attr)
+                                                (intern (concat ":" (cdr attr)))))
+                                    xml-ns))))
+           (car children)))
+
+        ;; expand element names
+        (setcdr children (list (xml-maybe-do-ns (cadr children) "" xml-ns)))
+
 	;; is this an empty element ?
 	(if (looking-at "/>")
 	(progn
@@ -416,7 +392,7 @@
      (t	;; This is not a tag.
       (error "XML: Invalid character")))))
 
-(defun xml-parse-attlist ()
+(defun xml-parse-attlist (&optional xml-ns)
   "Return the attribute-list after point.  Leave point at the
 first non-blank character after the tag."
   (let ((attlist ())
@@ -424,8 +400,9 @@
     (skip-syntax-forward " ")
     (while (looking-at (eval-when-compile
 			 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
-      (setq name (match-string 1))
-      (goto-char (match-end 0))
+      (setq end-pos (match-end 0))
+      (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
+      (goto-char end-pos)
 
       ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
 
@@ -527,7 +504,7 @@
 
 	   ;;  Translation of rule [45] of XML specifications
 	   ((looking-at
-	     "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
+	     "<!ELEMENT\\s-+\\([[:alnum:].%;-]+\\)\\s-+\\([^>]+\\)>")
 
 	    (setq element (match-string 1)
 		  type    (match-string-no-properties 2))
@@ -556,7 +533,15 @@
 	    (goto-char end-pos))
 	   ((looking-at "<!--")
 	    (search-forward "-->"))
-
+           ((looking-at "<!ENTITY\\s-+\\([[:alnum:].%;-]+\\)\\s-+\\([^>]+\\)>")
+            ; Put the ENTITY in
+            (goto-char (match-end 0)))
+           ((looking-at "<!ATTLIST\\s+\\([[:alnum:].%;-]+\\)\\s+\\([^>]+\\)>")
+            ; Put in the ATTLIST
+            (goto-char (match-end 0)))
+           ((looking-at "<!NOTATION\\s+\\([[:alnum:].%;-]+\\)\\s+\\([^>]+\\)>")
+            ; Put in the NOTATION
+            (goto-char (match-end 0)))
 	   (t
 	    (error "XML: Invalid DTD item")))
 


-- 
A choice between one man and a shovel, or a dozen men with teaspoons
is clear to me, and I'm sure it is clear to you also.
    -- Zimran Ahmed <http://www.winterspeak.com/>

  reply	other threads:[~2004-03-03  6:35 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-03-01 15:59 Refactoring xml.el namespace handling Mark A. Hershberger
2004-03-02 21:52 ` Stefan Monnier
2004-03-03  6:35   ` Mark A. Hershberger [this message]
2004-03-04 17:50     ` Stefan Monnier
2004-03-04 17:52       ` Stefan Monnier
2004-03-08  1:33       ` Kim F. Storm
2004-03-08  4:06         ` Mark A. Hershberger
2004-03-12  9:57         ` Richard Stallman
  -- strict thread matches above, loose matches on Subject: below --
2004-03-05 19:03 Mark A. Hershberger
2004-03-05 19:29 ` Stefan Monnier
2004-04-14 18:36 ` Stefan Monnier
2004-04-16  3:14   ` Katsumi Yamaoka
2004-04-16 20:05     ` Mark A. Hershberger
2004-04-16 22:27       ` Stefan Monnier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=878yiimo8x.fsf@weblog.localhost \
    --to=mah@everybody.org \
    --cc=emacs-devel@gnu.org \
    --cc=emacs-pretest-bug@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).