unofficial mirror of help-gnu-emacs@gnu.org
 help / color / mirror / Atom feed
* use of "match-string"
@ 2011-03-08 17:31 ken
  2011-03-08 19:06 ` PJ Weisberg
  0 siblings, 1 reply; 5+ messages in thread
From: ken @ 2011-03-08 17:31 UTC (permalink / raw)
  To: GNU Emacs List


Part of this function doesn't make sense--

(defun html-toc-find-max ()
  (goto-char (point-min))
  (let ((max-toc 0))
    (while (search-forward-regexp html-toc-tocref nil t)
      (if (> (string-to-int (match-string 1)) max-toc)
          (setq max-toc (string-to-int (match-string 1)))))
    (1+ max-toc)))

-- specifically, where match-string is first called and turned into a
number.  The docs say that match-string returns a string....  Yes, this
can be done I suppose, but to what end?  Moreover, depending upon its
value, this "number" may then be assigned to a variable, and that value
then compared with subsequent strings.

Perhaps I'm missing some nuance here.  The entirety of the code is
below.  Does anyone understand what's going on here?

tia.


;;; html-toc.el creates a table-of-contents on a html-document

;; Copyright (c) 2001 Rolf Rander N�ss

;; Author:   Rolf Rander N�ss <rolfn@pvv.org>
;; Created:  17-Mar-2001
;; Version:  0.3
;; Keywords: html
;; X-URL:    http://www.pvv.org/~rolfn/html-toc.el

;; This is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.  This is distributed in the hope that it will be
;; useful, but without any warranty; without even the implied warranty
;; of merchantability or fitness for a particular purpose.  See the
;; GNU General Public License for more details.  You should have
;; received a copy of the GNU General Public License along with GNU
;; Emacs; see the file `COPYING'.  If not, write to the Free Software
;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;; 02111-1307, USA.

;; Brief users guide:
;;
;;   This package will create a table-of-contents in a HTML-document
;;   based on <H[123]> tags.  The toc will be placed between the
;;   strings defined by *toc-open* and *toc-close*.  If these doesn't
;;   exist, it will be placed right after <body>.  If no <body>-tag
;;   exists, it will be put right before the first header.

;; Known bugs:
;;
;; - strange things will happen if your headers aren't valid (lack a
;;   close-tag), but in this case your page will also look strange in a
;;   browser...

(defvar html-toc-head-open  "<[ \t\n]*[Hh]\\([1-3]\\)[^>]*>")
(defvar html-toc-head-close "<[ \t\n]*/[ \t\n]*[Hh][1-3][ \t\n]*>")
(defvar html-toc-a-name     "<A NAME=\"\\([^\"]*\\)\"></A>")
(defvar html-toc-open       "<!-- table of contents start -->")
(defvar html-toc-close      "<!-- table of contents end -->")
(defvar html-toc-body-open  "<[ \t\n]*[Bb][Oo][Dd][Yy][ \t\n]*>")
(defvar html-toc-list-open  "<UL>")
(defvar html-toc-list-close "</UL>")
(defvar html-toc-list-item  "<LI>")
(defvar html-toc-name-pre   "tocref")
(defvar html-toc-title      "Table of Contents")
(defvar html-toc-tocref     (concat "<A NAME=\"" html-toc-name-pre
                                    "\\([0-9]*\\)\">"))

(defun html-toc-find-position ()
  (goto-char (point-min))
  (if (search-forward html-toc-open nil t)
      (let ((toc-start (point)))
        (if (search-forward html-toc-close nil t)
            (delete-region toc-start (match-beginning 0))
          (insert html-toc-close))
        (goto-char toc-start)
        (insert "\n"))
    (progn (goto-char (point-min))
           (if (not (search-forward-regexp html-toc-body-open nil t))
               (progn (goto-char (point-min))
                      (search-forward-regexp html-toc-head-open nil t)
                      (goto-char (match-beginning 0))))
           (insert html-toc-open "\n")
           (let ((p (point)))
             (insert html-toc-close "\n\n")
             (goto-char p)))))


(defun html-toc-find-max ()
  (goto-char (point-min))
  (let ((max-toc 0))
    (while (search-forward-regexp html-toc-tocref nil t)
      (if (> (string-to-int (match-string 1)) max-toc)
          (setq max-toc (string-to-int (match-string 1)))))
    (1+ max-toc)))

(defun html-toc-build ()
  (let ((toc '())
        (toc-cnt (html-toc-find-max)))
    (goto-char (point-min))
    (while (search-forward-regexp html-toc-head-open nil t)
      (let* ((level (string-to-int (match-string 1)))
             (name (cond ((looking-at html-toc-a-name)
                          (goto-char (match-end 0))
                          (match-string 1))
                         (t (let ((n (concat html-toc-name-pre
                                             (int-to-string toc-cnt))))
                              (insert "<A NAME=\"" n "\"></A>")
                              (setq toc-cnt (1+ toc-cnt))
                              n))))
             (head-start (point)))
        (search-forward-regexp html-toc-head-close nil t)
        (setq toc (cons (list level
                              name
                              (buffer-substring head-start
                                                (match-beginning 0)))
                        toc))))
    (nreverse toc)))

(defun html-toc-aref (name text)
  (concat "<A HREF=\"#" name "\">" text "</A>"))

(defun html-toc-write-level (toc cur-level)
  (if toc
      (let* ((entry (car toc))
             (level (car entry))
             (name (cadr entry))
             (text (cadr (cdr entry)))
             (rest (cdr toc)))
        (cond ((> level cur-level)
               (insert html-toc-list-open "\n")
               (setq rest (html-toc-write-level toc (1+ cur-level)))
               (insert html-toc-list-close "\n")
               (html-toc-write-level rest cur-level))
              ((= level cur-level)
               (insert html-toc-list-item (html-toc-aref name text) "\n")
               (html-toc-write-level rest cur-level))
              ((< level cur-level)
               toc)))))

(defun html-toc-write (toc)
  (insert "<H1><A NAME=\"toc\"></A>" html-toc-title "</H1>\n")
  (html-toc-write-level toc 0))

(defun html-toc ()
  (interactive)
  (save-excursion
    (html-toc-find-position)
    (let* ((toc-pos (point))
           (toc (html-toc-build)))
      (goto-char toc-pos)
      (html-toc-write toc))))

(provide 'html-toc)



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: use of "match-string"
  2011-03-08 17:31 ken
@ 2011-03-08 19:06 ` PJ Weisberg
  2011-03-08 19:32   ` ken
  0 siblings, 1 reply; 5+ messages in thread
From: PJ Weisberg @ 2011-03-08 19:06 UTC (permalink / raw)
  To: GNU Emacs List

On Tue, Mar 8, 2011 at 9:31 AM, ken <gebser@mousecar.com> wrote:
>
> Part of this function doesn't make sense--
>
> (defun html-toc-find-max ()
>  (goto-char (point-min))
>  (let ((max-toc 0))
>    (while (search-forward-regexp html-toc-tocref nil t)
>      (if (> (string-to-int (match-string 1)) max-toc)
>          (setq max-toc (string-to-int (match-string 1)))))
>    (1+ max-toc)))
>
> -- specifically, where match-string is first called and turned into a
> number.  The docs say that match-string returns a string....  Yes, this
> can be done I suppose, but to what end?  Moreover, depending upon its
> value, this "number" may then be assigned to a variable, and that value
> then compared with subsequent strings.
>
> Perhaps I'm missing some nuance here.  The entirety of the code is
> below.  Does anyone understand what's going on here?

Maybe it would help if you looked at the value of the regexp that's
being matched.  The function html-toc-find-max does (almost) exactly
what it says it does: it finds the highest numbered section in the
table of contents, then returns that number plus one.

It's converting the strings to numbers because if it just compared the
strings alphabetically "25" would come before "3", etc.

-PJ



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: use of "match-string"
  2011-03-08 19:06 ` PJ Weisberg
@ 2011-03-08 19:32   ` ken
  0 siblings, 0 replies; 5+ messages in thread
From: ken @ 2011-03-08 19:32 UTC (permalink / raw)
  To: PJ Weisberg; +Cc: GNU Emacs List

Thanks, PJ.  You're right.  The parens are pulling out just the numbers
and so the comparison isn't looking at the whole string searched for.
Totally clear now.


On 03/08/2011 02:06 PM PJ Weisberg wrote:
> On Tue, Mar 8, 2011 at 9:31 AM, ken <gebser@mousecar.com> wrote:
>> Part of this function doesn't make sense--
>>
>> (defun html-toc-find-max ()
>>  (goto-char (point-min))
>>  (let ((max-toc 0))
>>    (while (search-forward-regexp html-toc-tocref nil t)
>>      (if (> (string-to-int (match-string 1)) max-toc)
>>          (setq max-toc (string-to-int (match-string 1)))))
>>    (1+ max-toc)))
>>
>> -- specifically, where match-string is first called and turned into a
>> number.  The docs say that match-string returns a string....  Yes, this
>> can be done I suppose, but to what end?  Moreover, depending upon its
>> value, this "number" may then be assigned to a variable, and that value
>> then compared with subsequent strings.
>>
>> Perhaps I'm missing some nuance here.  The entirety of the code is
>> below.  Does anyone understand what's going on here?
> 
> Maybe it would help if you looked at the value of the regexp that's
> being matched.  The function html-toc-find-max does (almost) exactly
> what it says it does: it finds the highest numbered section in the
> table of contents, then returns that number plus one.
> 
> It's converting the strings to numbers because if it just compared the
> strings alphabetically "25" would come before "3", etc.
> 
> -PJ
> 



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: use of "match-string"
       [not found] <mailman.2.1299605488.4046.help-gnu-emacs@gnu.org>
@ 2011-03-08 21:36 ` Tim X
  2011-03-08 23:48   ` ken
  0 siblings, 1 reply; 5+ messages in thread
From: Tim X @ 2011-03-08 21:36 UTC (permalink / raw)
  To: help-gnu-emacs

ken <gebser@mousecar.com> writes:

> Part of this function doesn't make sense--
>
> (defun html-toc-find-max ()
>   (goto-char (point-min))
>   (let ((max-toc 0))
>     (while (search-forward-regexp html-toc-tocref nil t)
>       (if (> (string-to-int (match-string 1)) max-toc)
>           (setq max-toc (string-to-int (match-string 1)))))
>     (1+ max-toc)))
>
> -- specifically, where match-string is first called and turned into a
> number.  The docs say that match-string returns a string....  Yes, this
> can be done I suppose, but to what end?  Moreover, depending upon its
> value, this "number" may then be assigned to a variable, and that value
> then compared with subsequent strings.
>
> Perhaps I'm missing some nuance here.  The entirety of the code is
> below.  Does anyone understand what's going on here?
>

I'm not sure what it is you find 'odd' about the above function. Apart
from the fact it should be using string-to-number (string-to-int has
been marked obsolete since 22.1), it seems reasonable to me. 

The regexp used in the match is 

> (defvar html-toc-tocref     (concat "<A NAME=\"" html-toc-name-pre
>                                     "\\([0-9]*\\)\">"))

Note the 1st (and only) grouping in the regexp i.e. \\([0-9]*\\), which
will match on 0 or more digits between 0..9. This is what (match-string
1) will return. (though as it is [0-9]* it could return 0 or more
digits, so match-string 1 could be "", which may be an issue).

The string-to-int call will return that value as a number rather than as
a string, which is then compared to max-toc (initially set to 0), not to
a string. The final value has 1 added to it. So, your not comparing
strings, you are comparing strings of numers that are converted to be a number.

So, this function would search through the buffer for the specified
regexp, extract the group of digits as a string, convert them to a
number and compare them to the last one found. If the number is larger,
it would set that as the max and then continue the loop. Finally, it
adds 1. 

I can see some things I would do differently and even if you don't find
a match, your max-toc value will have a value of at least 1, but apart
from that, it seems to do whatever it was intended to do. 

Tim


-- 
tcross (at) rapttech dot com dot au


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: use of "match-string"
  2011-03-08 21:36 ` use of "match-string" Tim X
@ 2011-03-08 23:48   ` ken
  0 siblings, 0 replies; 5+ messages in thread
From: ken @ 2011-03-08 23:48 UTC (permalink / raw)
  To: Tim X; +Cc: help-gnu-emacs

Thanks, Tim.  PJ pointed this out a couple hours before you.  You were
quick, but he was quicker.


On 03/08/2011 04:36 PM Tim X wrote:
> ken <gebser@mousecar.com> writes:
> 
>> Part of this function doesn't make sense--
>>
>> (defun html-toc-find-max ()
>>   (goto-char (point-min))
>>   (let ((max-toc 0))
>>     (while (search-forward-regexp html-toc-tocref nil t)
>>       (if (> (string-to-int (match-string 1)) max-toc)
>>           (setq max-toc (string-to-int (match-string 1)))))
>>     (1+ max-toc)))
>>
>> -- specifically, where match-string is first called and turned into a
>> number.  The docs say that match-string returns a string....  Yes, this
>> can be done I suppose, but to what end?  Moreover, depending upon its
>> value, this "number" may then be assigned to a variable, and that value
>> then compared with subsequent strings.
>>
>> Perhaps I'm missing some nuance here.  The entirety of the code is
>> below.  Does anyone understand what's going on here?
>>
> 
> I'm not sure what it is you find 'odd' about the above function. Apart
> from the fact it should be using string-to-number (string-to-int has
> been marked obsolete since 22.1), it seems reasonable to me. 
> 
> The regexp used in the match is 
> 
>> (defvar html-toc-tocref     (concat "<A NAME=\"" html-toc-name-pre
>>                                     "\\([0-9]*\\)\">"))
> 
> Note the 1st (and only) grouping in the regexp i.e. \\([0-9]*\\), which
> will match on 0 or more digits between 0..9. This is what (match-string
> 1) will return. (though as it is [0-9]* it could return 0 or more
> digits, so match-string 1 could be "", which may be an issue).
> 
> The string-to-int call will return that value as a number rather than as
> a string, which is then compared to max-toc (initially set to 0), not to
> a string. The final value has 1 added to it. So, your not comparing
> strings, you are comparing strings of numers that are converted to be a number.
> 
> So, this function would search through the buffer for the specified
> regexp, extract the group of digits as a string, convert them to a
> number and compare them to the last one found. If the number is larger,
> it would set that as the max and then continue the loop. Finally, it
> adds 1. 
> 
> I can see some things I would do differently and even if you don't find
> a match, your max-toc value will have a value of at least 1, but apart
> from that, it seems to do whatever it was intended to do. 
> 
> Tim
> 
> 



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-03-08 23:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <mailman.2.1299605488.4046.help-gnu-emacs@gnu.org>
2011-03-08 21:36 ` use of "match-string" Tim X
2011-03-08 23:48   ` ken
2011-03-08 17:31 ken
2011-03-08 19:06 ` PJ Weisberg
2011-03-08 19:32   ` ken

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).