From mboxrd@z Thu Jan 1 00:00:00 1970 Path: main.gmane.org!not-for-mail From: mah@everybody.org (Mark A. Hershberger) Newsgroups: gmane.emacs.bugs Subject: Update namespace parsing in xml.el Date: Wed, 22 Oct 2003 00:36:07 -0500 Sender: bug-gnu-emacs-bounces+geb-bug-gnu-emacs=m.gmane.org@gnu.org Message-ID: <8765ihrfzs.fsf@weblog.localhost> NNTP-Posting-Host: deer.gmane.org Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii X-Trace: sea.gmane.org 1066797081 26046 80.91.224.253 (22 Oct 2003 04:31:21 GMT) X-Complaints-To: usenet@sea.gmane.org NNTP-Posting-Date: Wed, 22 Oct 2003 04:31:21 +0000 (UTC) Original-X-From: bug-gnu-emacs-bounces+geb-bug-gnu-emacs=m.gmane.org@gnu.org Wed Oct 22 06:31:19 2003 Return-path: Original-Received: from monty-python.gnu.org ([199.232.76.173]) by deer.gmane.org with esmtp (Exim 3.35 #1 (Debian)) id 1ACAeU-0005sd-01 for ; Wed, 22 Oct 2003 06:31:18 +0200 Original-Received: from localhost ([127.0.0.1] helo=monty-python.gnu.org) by monty-python.gnu.org with esmtp (Exim 4.24) id 1ACAbp-0000U4-36 for geb-bug-gnu-emacs@m.gmane.org; Wed, 22 Oct 2003 00:28:33 -0400 Original-Received: from list by monty-python.gnu.org with tmda-scanned (Exim 4.24) id 1ACAbl-0000Tu-MM for bug-gnu-emacs@gnu.org; Wed, 22 Oct 2003 00:28:29 -0400 Original-Received: from mail by monty-python.gnu.org with spam-scanned (Exim 4.24) id 1ACAbF-0000Sk-IG for bug-gnu-emacs@gnu.org; Wed, 22 Oct 2003 00:28:28 -0400 Original-Received: from [80.91.224.249] (helo=main.gmane.org) by monty-python.gnu.org with esmtp (Exim 4.24) id 1ACAbF-0000SO-0Q for bug-gnu-emacs@gnu.org; Wed, 22 Oct 2003 00:27:57 -0400 Original-Received: from list by main.gmane.org with local (Exim 3.35 #1 (Debian)) id 1ACAbB-00059q-00 for ; Wed, 22 Oct 2003 06:27:53 +0200 X-Injected-Via-Gmane: http://gmane.org/ Original-To: bug-gnu-emacs@gnu.org Original-Received: from sea.gmane.org ([80.91.224.252]) by main.gmane.org with esmtp (Exim 3.35 #1 (Debian)) id 1ACAbA-00059i-00 for ; Wed, 22 Oct 2003 06:27:52 +0200 Original-Received: from news by sea.gmane.org with local (Exim 3.35 #1 (Debian)) id 1ACAbA-0006fu-00 for ; Wed, 22 Oct 2003 06:27:52 +0200 Original-Lines: 239 Original-X-Complaints-To: usenet@sea.gmane.org X-URL: http://mah.everybody.org/weblog/ User-Agent: Gnus/5.1003 (Gnus v5.10.3) Emacs/21.3.50 (gnu/linux) Cancel-Lock: sha1:N4I9oX7AFuzZJ3VvSfHgAyqtJ/o= X-BeenThere: bug-gnu-emacs@gnu.org X-Mailman-Version: 2.1.2 Precedence: list List-Id: Bug reports for GNU Emacs, the Swiss army knife of text editors List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: bug-gnu-emacs-bounces+geb-bug-gnu-emacs=m.gmane.org@gnu.org Xref: main.gmane.org gmane.emacs.bugs:6016 X-Report-Spam: http://spam.gmane.org/gmane.emacs.bugs:6016 2003-10-22 Mark A. Hershberger * xml.el: Allow comments following the top-level element. Separate out namespace parsing into special functions. Change namespace parsing to return ('ns-uri . "local-name") instead of '{ns-uri}local-name. diff -u -w -r1.24 xml.el --- xml.el 1 Sep 2003 15:45:18 -0000 1.24 +++ xml.el 22 Oct 2003 04:13:11 -0000 @@ -208,13 +208,14 @@ (if (search-forward "<" nil t) (progn (forward-char -1) - (if xml + (setq result (xml-parse-tag parse-dtd parse-ns)) + (if (and xml result) ;; translation of rule [1] of XML specifications (error "XML files can have only one toplevel tag") - (setq result (xml-parse-tag parse-dtd parse-ns)) (cond ((null result)) - ((listp (car result)) + ((and (listp (car result)) + parse-dtd) (setq dtd (car result)) (if (cdr result) ; possible leading comment (add-to-list 'xml (cdr result)))) @@ -225,6 +226,73 @@ (cons dtd (nreverse xml)) (nreverse xml))))))) +(defun xml-ns-parse-ns-attrs (attr-list &optional xml-ns) + "Parse the namespace attributes and return a list of cons in the form: +\(namespace . prefix)" + + (mapcar + (lambda (attr) + (let* ((splitup (split-string (car attr) ":")) + (prefix (nth 0 splitup)) + (lname (nth 1 splitup))) + (when (string= "xmlns" prefix) + (push (cons (if lname + lname + "") + (cdr attr)) + xml-ns)))) attr-list) + xml-ns) + +;; expand element names +(defun xml-ns-expand-el (el xml-ns) + "Expand the XML elements from \"prefix:local-name\" to a cons in the form +\"(namespace . local-name)\"." + + (let* ((splitup (split-string el ":")) + (lname (or (nth 1 splitup) + (nth 0 splitup))) + (prefix (if (nth 1 splitup) + (nth 0 splitup) + (if (string= lname "xmlns") + "xmlns" + ""))) + (ns (cdr (assoc-string prefix xml-ns)))) + (if (string= "" ns) + lname + (cons (intern (concat ":" ns)) + lname)))) + +;; expand attribute names +(defun xml-ns-expand-attr (attr-list xml-ns) + "Expand the attribute list for a particular element from the form +\"prefix:local-name\" to the form \"{namespace}:local-name\"." + + (mapcar + (lambda (attr) + (let* ((splitup (split-string (car attr) ":")) + (lname (or (nth 1 splitup) + (nth 0 splitup))) + (prefix (if (nth 1 splitup) + (nth 0 splitup) + (if (string= (car attr) "xmlns") + "xmlns" + ""))) + (ns (cdr (assoc-string prefix xml-ns)))) + (setcar attr + (if (string= "" ns) + lname + (cons (intern (concat ":" ns)) + lname))))) + attr-list) + attr-list) + + +(defun xml-intern-attrlist (attr-list) + "Convert attribute names to symbols for backward compatibility." + (mapcar (lambda (attr) + (setcar attr (intern (car attr)))) + attr-list) + attr-list) (defun xml-parse-tag (&optional parse-dtd parse-ns) "Parse the tag at point. @@ -276,53 +344,22 @@ ;; opening tag ((looking-at "<\\([^/>[:space:]]+\\)") (goto-char (match-end 1)) + + ;; Parse this node (let* ((node-name (match-string 1)) - ;; Parse the attribute list. - (children (list (xml-parse-attlist) (intern node-name))) + (attr-list (xml-parse-attlist)) + (children (if (consp xml-ns) ;; take care of namespace parsing + (progn + (setq xml-ns (xml-ns-parse-ns-attrs + attr-list xml-ns)) + (list (xml-ns-expand-attr + attr-list xml-ns) + (xml-ns-expand-el + node-name xml-ns))) + (list (xml-intern-attrlist attr-list) + (intern node-name)))) pos) - ;; add the xmlns:* attrs to our cache - (when (consp xml-ns) - (mapcar - (lambda (attr) - (let* ((splitup (split-string (symbol-name (car attr)) ":")) - (prefix (nth 0 splitup)) - (lname (nth 1 splitup))) - (when (string= "xmlns" prefix) - (setq xml-ns (append (list (cons (if lname - lname - "") - (cdr attr))) - xml-ns))))) - (car children)) - - ;; expand element names - (let* ((splitup (split-string (symbol-name (cadr children)) ":")) - (lname (or (nth 1 splitup) - (nth 0 splitup))) - (prefix (if (nth 1 splitup) - (nth 0 splitup) - ""))) - (setcdr children (list - (intern (concat "{" - (cdr (assoc-string prefix xml-ns)) - "}" lname))))) - - ;; expand attribute names - (mapcar - (lambda (attr) - (let* ((splitup (split-string (symbol-name (car attr)) ":")) - (lname (or (nth 1 splitup) - (nth 0 splitup))) - (prefix (if (nth 1 splitup) - (nth 0 splitup) - (caar xml-ns)))) - - (setcar attr (intern (concat "{" - (cdr (assoc-string prefix xml-ns)) - "}" lname))))) - (car children))) - ;; is this an empty element ? (if (looking-at "/>") (progn @@ -377,13 +414,14 @@ (error "XML: Invalid character"))))) (defun xml-parse-attlist () - "Return the attribute-list after point.Leave point at the first non-blank character after the tag." + "Return the attribute-list after point. Leave point at the +first non-blank character after the tag." (let ((attlist ()) - start-pos name) + end-pos name) (skip-syntax-forward " ") (while (looking-at (eval-when-compile (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) - (setq name (intern (match-string 1))) + (setq name (match-string 1)) (goto-char (match-end 0)) ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize @@ -391,9 +429,9 @@ ;; Do we have a string between quotes (or double-quotes), ;; or a simple word ? (if (looking-at "\"\\([^\"]*\\)\"") - (setq start-pos (match-beginning 0)) + (setq end-pos (match-end 0)) (if (looking-at "'\\([^']*\\)'") - (setq start-pos (match-beginning 0)) + (setq end-pos (match-end 0)) (error "XML: Attribute values must be given between quotes"))) ;; Each attribute must be unique within a given element @@ -407,9 +445,7 @@ (replace-regexp-in-string "\\s-\\{2,\\}" " " string) (push (cons name (xml-substitute-special string)) attlist)) - (goto-char start-pos) - (forward-sexp) ; we have string syntax - + (goto-char end-pos) (skip-syntax-forward " ")) (nreverse attlist))) @@ -490,7 +526,7 @@ ((looking-at "]+\\)>") - (setq element (intern (match-string 1)) + (setq element (match-string 1) type (match-string-no-properties 2)) (setq end-pos (match-end 0)) @@ -510,7 +546,7 @@ ;; rule [45]: the element declaration must be unique (if (assoc element dtd) (error "XML: element declarations must be unique in a DTD (<%s>)" - (symbol-name element))) + element) ;; Store the element in the DTD (push (list element type) dtd) @@ -523,8 +559,7 @@ ;; Skip the end of the DTD (search-forward ">")))) - (nreverse dtd))) - + (nreverse dtd)))) (defun xml-parse-elem-type (string) "Convert element type STRING into a Lisp structure."