From: Stefan Monnier <monnier@IRO.UMontreal.CA>
To: Eli Zaretskii <eliz@gnu.org>
Cc: teddy@recompile.se, 20704@debbugs.gnu.org
Subject: bug#20704: info.el bug fix; Interprets Info format wrongly
Date: Wed, 10 Jun 2015 13:50:29 -0400 [thread overview]
Message-ID: <jwvvbevpi7k.fsf-monnier+emacsbugs@gnu.org> (raw)
In-Reply-To: <83382btqcc.fsf@gnu.org> (Eli Zaretskii's message of "Mon, 01 Jun 2015 18:12:35 +0300")
> Using byte-to-position would make things worse for Latin-1 and the likes.
There's also the problem of EOL encoding, but I'll just ignore it for now.
Could someone test the patch below?
Stefan
diff --git a/lisp/info.el b/lisp/info.el
index 9602337..0de7f1e 100644
--- a/lisp/info.el
+++ b/lisp/info.el
@@ -1020,7 +1020,7 @@ which the match was found."
(beginning-of-line)
(when (re-search-forward regexp nil t)
(list (string-equal "Ref:" (match-string 1))
- (+ (point-min) (read (current-buffer)))
+ (filepos-to-bufferpos (read (current-buffer)) 'approximate)
major-mode)))))
(defun Info-find-in-tag-table (marker regexp &optional strict-case)
@@ -1187,7 +1187,8 @@ is non-nil)."
(when found
;; FOUND is (ANCHOR POS MODE).
- (setq guesspos (nth 1 found))
+ (setq guesspos (filepos-to-bufferpos (nth 1 found)
+ 'approximate))
;; If this is an indirect file, determine which
;; file really holds this node and read it in.
@@ -1203,8 +1204,7 @@ is non-nil)."
(throw 'foo t)))))
;; Else we may have a node, which we search for:
- (goto-char (max (point-min)
- (- (byte-to-position guesspos) 1000)))
+ (goto-char (max (point-min) (- guesspos 1000)))
;; Now search from our advised position (or from beg of
;; buffer) to find the actual node. First, check
@@ -1523,7 +1523,9 @@ is non-nil)."
thisfilepos thisfilename)
(search-forward ": ")
(setq thisfilename (buffer-substring beg (- (point) 2)))
- (setq thisfilepos (+ (point-min) (read (current-buffer))))
+ (setq thisfilepos
+ (filepos-to-bufferpos (read (current-buffer))
+ 'approximate))
;; read in version 19 stops at the end of number.
;; Advance to the next line.
(forward-line 1)
@@ -1554,7 +1556,7 @@ is non-nil)."
;; Don't add the length of the skipped summary segment to
;; the value returned to `Info-find-node-2'. (Bug#14125)
(if (numberp nodepos)
- (+ (- nodepos lastfilepos) (point-min)))))
+ (- nodepos lastfilepos))))
(defun Info-unescape-quotes (value)
"Unescape double quotes and backslashes in VALUE."
@@ -2013,8 +2015,9 @@ If DIRECTION is `backward', search in the reverse direction."
(re-search-backward "\\(^.*\\): [0-9]+$")
(re-search-forward "\\(^.*\\): [0-9]+$"))
(goto-char (+ (match-end 1) 2))
- (setq list (cons (cons (+ (point-min)
- (read (current-buffer)))
+ (setq list (cons (cons (filepos-to-bufferpos
+ (read (current-buffer))
+ 'approximate)
(match-string-no-properties 1))
list))
(goto-char (if backward
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
index eae787b..1f7df0b 100644
--- a/lisp/international/mule-util.el
+++ b/lisp/international/mule-util.el
@@ -313,6 +313,35 @@ per-character basis, this may not be accurate."
(throw 'tag3 charset)))
charset-list)
nil)))))))))
+
+;;;###autoload
+(defun filepos-to-bufferpos (byte &optional quality coding-system)
+ "Try to return the buffer position corresponding to a particular file position.
+The file position is given as a BYTE count.
+The function presumes the file is encoded with CODING-SYSTEM, which defaults
+to `buffer-file-coding-system'.
+QUALITY can be:
+ `approximate', in which case we may cut some corners to avoid
+ excessive work.
+ nil, in which case we may return nil rather than an approximation."
+ ;; `exact', in which case we may end up re-(en|de)coding a large
+ ;; part of the file.
+ (unless coding-system (setq coding-system buffer-file-coding-system))
+ (let ((eol (coding-system-eol-type coding-system))
+ (type (coding-system-type coding-system))
+ (pm (save-restriction (widen) (point-min))))
+ (pcase (cons type eol)
+ (`(utf-8 . ,(or 0 2))
+ (let ((bom-offset (coding-system-get coding-system :bom)))
+ (byte-to-position
+ (+ pm (max 0 (- byte (if bom-offset 3 0)))))))
+ ;; FIXME: What if it's a 2-byte charset? Are there such beasts?
+ (`(charset . ,(or 0 2)) (+ pm byte))
+ (_
+ (pcase quality
+ (`approximate (+ pm (byte-to-position byte)))
+ ;; (`exact ...)
+ )))))
\f
(provide 'mule-util)
next prev parent reply other threads:[~2015-06-10 17:50 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-31 14:54 bug#20704: info.el bug fix; Interprets Info format wrongly Teddy Hogeborn
2015-06-01 14:01 ` Stefan Monnier
2015-06-01 15:12 ` Eli Zaretskii
2015-06-09 11:09 ` Teddy Hogeborn
2015-06-09 14:29 ` Eli Zaretskii
2015-06-09 16:01 ` Stefan Monnier
2015-06-10 17:50 ` Stefan Monnier [this message]
2015-06-10 18:21 ` Eli Zaretskii
2015-06-11 3:02 ` Stefan Monnier
2015-06-11 13:11 ` Eli Zaretskii
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=jwvvbevpi7k.fsf-monnier+emacsbugs@gnu.org \
--to=monnier@iro.umontreal.ca \
--cc=20704@debbugs.gnu.org \
--cc=eliz@gnu.org \
--cc=teddy@recompile.se \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).