unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Visuwesh <visuweshm@gmail.com>
To: Tassilo Horn <tsdh@gnu.org>
Cc: Eli Zaretskii <eliz@gnu.org>, 73638@debbugs.gnu.org
Subject: bug#73638: 31.0.50; doc-view: imenu index cannot be made for LaTeX PDFs
Date: Sun, 06 Oct 2024 18:02:48 +0530	[thread overview]
Message-ID: <8734l9h0nj.fsf@gmail.com> (raw)
In-Reply-To: <87plod8ob3.fsf@gnu.org> (Tassilo Horn's message of "Sun, 06 Oct 2024 13:26:40 +0200")

[-- Attachment #1: Type: text/plain, Size: 683 bytes --]

[ஞாயிறு அக்டோபர் 06, 2024] Tassilo Horn wrote:

> Visuwesh <visuweshm@gmail.com> writes:
>
>> As you expected, your minified version works fine when doing
>> process-send-string.  I do have not much experience working with async
>> processes like this before, what do you think about the approach
>> below?
>
> I don't do that very frequently.  I think it would be simpler if we skip
> the sentinel and instead use some :buffer " *mutool-run-result*" with
> make-process and just read from there after the accept-process-output
> loop.

That was a serious brainfart, indeed.  I've went with your approach in
the attached, please review.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Make-imenu-index-generation-for-PDFs-more-reliable.patch --]
[-- Type: text/x-diff, Size: 4395 bytes --]

From 6a9de26ac3efbdd9931c74db90e2aeac2bc0dca8 Mon Sep 17 00:00:00 2001
From: Visuwesh <visuweshm@gmail.com>
Date: Sun, 6 Oct 2024 18:02:06 +0530
Subject: [PATCH] Make imenu index generation for PDFs more reliable

Do away with parsing the output of "mutool show FILE outline"
since the URI reported in its output may not include the page
number of the heading, and instead may contained "nameddest"
elements which cannot be resolved using "mutool".  Instead, use
a MuPDF JS script to generate the PDF outline allowing to
resolve such URIs.

* lisp/doc-view.el (doc-view--outline-rx): Remove as no longer
needed.
(doc-view--outline): Reflect that outline can be generated for
non-PDF files too.
(doc-view--mutool-pdf-outline-script): Add new variable to hold
the JS script used to generate the outline.
(doc-view--pdf-outline): Use the script.  (bug#73638)
---
 lisp/doc-view.el | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/lisp/doc-view.el b/lisp/doc-view.el
index 446beeafd9f..a5e84f1e2ab 100644
--- a/lisp/doc-view.el
+++ b/lisp/doc-view.el
@@ -1969,14 +1969,26 @@ doc-view-search-previous-match
 	(doc-view-goto-page (caar (last doc-view--current-search-matches)))))))
 
 ;;;; Imenu support
-(defconst doc-view--outline-rx
-  "[^\t]+\\(\t+\\)\"\\(.+\\)\"\t#\\(?:page=\\)?\\([0-9]+\\)")
-
 (defvar-local doc-view--outline nil
-  "Cached PDF outline, so that it is only computed once per document.
+  "Cached document outline, so that it is only computed once per document.
 It can be the symbol `unavailable' to indicate that outline is
 unavailable for the document.")
 
+(defvar doc-view--mutool-pdf-outline-script
+  "var document = new Document.openDocument(\"%s\", \"application/pdf\");
+var outline = document.loadOutline();
+if(!outline) quit();
+function pp(outl, level){print(\"((level . \" + level + \")\");\
+print(\"(title . \" + repr(outl.title) + \")\");\
+print(\"(page . \" + document.resolveLink(outl.uri) + \"))\");\
+if(outl.down){for(var i=0; i<outl.down.length; i++){pp(outl.down[i], level+1);}}};
+function run(){print(\"BEGIN(\");\
+for(var i=0; i<outline.length; i++){pp(outline[i], 1);}print(\")\");};
+run()"
+  "JS script to extract the PDF's outline using mutool.
+The script has to be minified to pass it to the REPL.  The \"BEGIN\"
+marker is here to skip past the prompt characters.")
+
 (defun doc-view--pdf-outline (&optional file-name)
   "Return a list describing the outline of FILE-NAME.
 Return a list describing the current file if FILE-NAME is nil.
@@ -1986,21 +1998,25 @@ doc-view--pdf-outline
 structure is extracted by `doc-view--imenu-subtree'."
   (let ((fn (or file-name (buffer-file-name))))
     (when fn
-      (let ((outline nil)
-            (fn (expand-file-name fn)))
-        (with-temp-buffer
-          (unless (eql 0 (call-process doc-view-pdfdraw-program nil
-                                       (current-buffer) nil "show" fn "outline"))
+      (with-temp-buffer
+        (let ((proc (make-process
+                     :name "doc-view-pdf-outline"
+                     :command (list "mutool" "run")
+                     :buffer (current-buffer))))
+          (process-send-string proc (format doc-view--mutool-pdf-outline-script
+                                            (expand-file-name fn)))
+          ;; Need to send this twice for some reason...
+          (process-send-eof)
+          (process-send-eof)
+          (while (accept-process-output proc))
+          (unless (eq (process-status proc) 'exit)
             (setq doc-view--outline 'unavailable)
             (imenu-unavailable-error "Unable to create imenu index using `mutool'"))
           (goto-char (point-min))
-          (while (re-search-forward doc-view--outline-rx nil t)
-            (push `((level . ,(length (match-string 1)))
-                    (title . ,(replace-regexp-in-string "\\\\[rt]" " "
-                                                        (match-string 2)))
-                    (page . ,(string-to-number (match-string 3))))
-                  outline)))
-        (nreverse outline)))))
+          (search-forward "BEGIN")
+          (condition-case nil
+              (read (current-buffer))
+            (end-of-file nil)))))))
 
 (defun doc-view--djvu-outline (&optional file-name)
   "Return a list describing the outline of FILE-NAME.
-- 
2.45.2


  reply	other threads:[~2024-10-06 12:32 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-05 11:06 bug#73638: 31.0.50; doc-view: imenu index cannot be made for LaTeX PDFs Visuwesh
2024-10-05 19:56 ` Tassilo Horn
2024-10-06  5:42   ` Eli Zaretskii
2024-10-06  6:28     ` Visuwesh
2024-10-06  6:39       ` Eli Zaretskii
2024-10-06  8:16       ` Tassilo Horn
2024-10-06 10:32         ` Visuwesh
2024-10-06 11:26           ` Tassilo Horn
2024-10-06 12:32             ` Visuwesh [this message]
2024-10-07  7:02               ` Tassilo Horn
2024-10-07  9:26                 ` Visuwesh
2024-10-07  9:55                   ` Visuwesh
2024-10-07 11:03                     ` Tassilo Horn
2024-10-07 12:53                       ` Visuwesh
2024-10-07 15:04                         ` Tassilo Horn
2024-10-08  9:44                           ` Visuwesh
2024-10-08 15:43                             ` Tassilo Horn
2024-10-06  6:39     ` Visuwesh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8734l9h0nj.fsf@gmail.com \
    --to=visuweshm@gmail.com \
    --cc=73638@debbugs.gnu.org \
    --cc=eliz@gnu.org \
    --cc=tsdh@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).