From 9bd57c7a942cbe2ebcfdfb12600cabc9354d1ba6 Mon Sep 17 00:00:00 2001 From: Daniel Nicolai Date: Tue, 11 Jan 2022 20:37:36 +0100 Subject: [PATCH] Add support for EPUB, CBZ, FB2 and (O)XPS extension to doc view * doc/emacs/misc.texi (Document View): Add requirements for new extensions (i.e. mutool) * lisp/doc-view.el (doc-view): Additionally update preliminary comment (doc-view-unoconv-program): Put code all on one line (doc-view-doc-type): Update docstring. (doc-view-kill-proc): Fix comment indentation (doc-view-mode-p): Add check for new extensions and alternative check for PDF (doc-view-pdf/ps->png): Associate new extension with png converter (doc-view-convert-current-doc): Handle new extensions like PDF's (doc-view-set-doc-type): Set correct doc-type for new extensions. * lisp/files.el (auto-mode-alist): Associate new extension types with doc-view --- doc/emacs/misc.texi | 23 +++++--- lisp/doc-view.el | 126 ++++++++++++++++++++++++++------------------ lisp/files.el | 2 +- 3 files changed, 92 insertions(+), 59 deletions(-) diff --git a/doc/emacs/misc.texi b/doc/emacs/misc.texi index df1e5ef238..365c079e89 100644 --- a/doc/emacs/misc.texi +++ b/doc/emacs/misc.texi @@ -455,20 +455,27 @@ Document View @cindex PostScript file @cindex OpenDocument file @cindex Microsoft Office file +@cindex EPUB file +@cindex CBZ file +@cindex FB2 file +@cindex XPS file +@cindex OXPS file @cindex DocView mode @cindex mode, DocView @cindex document viewer (DocView) @findex doc-view-mode DocView mode is a major mode for viewing DVI, PostScript (PS), PDF, -OpenDocument, and Microsoft Office documents. It provides features -such as slicing, zooming, and searching inside documents. It works by -converting the document to a set of images using the @command{gs} -(GhostScript) or @command{mudraw}/@command{pdfdraw} (MuPDF) commands -and other external tools @footnote{For PostScript files, GhostScript -is a hard requirement. For DVI files, @code{dvipdf} or @code{dvipdfm} -is needed. For OpenDocument and Microsoft Office documents, the -@code{unoconv} tool is needed.}, and displaying those images. +OpenDocument, Microsoft Office, EPUB, CBZ, FB2, XPS and OXPS +documents. It provides features such as slicing, zooming, and +searching inside documents. It works by converting the document to a +set of images using the @command{gs} (GhostScript) or +@command{pdfdraw}/@command{mutool draw} (MuPDF) commands and other +external tools @footnote{PostScript files require GhostScript, DVI +files require @code{dvipdf} or @code{dvipdfm}, OpenDocument and +Microsoft Office documents require the @code{unoconv} tool, and EPUB, +CBZ, FB2, XPS and OXPS files require @code{mutool} to be available.}, +and displaying those images. @findex doc-view-toggle-display @findex doc-view-minor-mode diff --git a/lisp/doc-view.el b/lisp/doc-view.el index 5b462b24f5..d13480310c 100644 --- a/lisp/doc-view.el +++ b/lisp/doc-view.el @@ -3,7 +3,7 @@ ;; Copyright (C) 2007-2022 Free Software Foundation, Inc. ;; ;; Author: Tassilo Horn -;; Keywords: files, pdf, ps, dvi +;; Keywords: files, pdf, ps, dvi, djvu, epub, cbz, fb2, xps, openxps ;; This file is part of GNU Emacs. @@ -25,17 +25,19 @@ ;; Viewing PS/PDF/DVI files requires Ghostscript, `dvipdf' (comes with ;; Ghostscript) or `dvipdfm' (comes with teTeX or TeXLive) and ;; `pdftotext', which comes with xpdf (https://www.foolabs.com/xpdf/) -;; or poppler (https://poppler.freedesktop.org/). -;; Djvu documents require `ddjvu' (from DjVuLibre). -;; ODF files require `soffice' (from LibreOffice). +;; or poppler (https://poppler.freedesktop.org/). EPUB, CBZ, FB2, XPS +;; and OXPS documents require `mutool' which comes with mupdf +;; (https://mupdf.com/index.html). Djvu documents require `ddjvu' +;; (from DjVuLibre). ODF files require `soffice' (from LibreOffice). ;;; Commentary: ;; DocView is a document viewer for Emacs. It converts a number of -;; document formats (including PDF, PS, DVI, Djvu and ODF files) to a -;; set of PNG files, one PNG for each page, and displays the PNG -;; images inside an Emacs buffer. This buffer uses `doc-view-mode' -;; which provides convenient key bindings for browsing the document. +;; document formats (including PDF, PS, DVI, Djvu, ODF, EPUB, CBZ, +;; FB2, XPS and OXPS files) to a set of PNG (or TIFF for djvu) files, +;; one image for each page, and displays the images inside an Emacs +;; buffer. This buffer uses `doc-view-mode' which provides convenient +;; key bindings for browsing the document. ;; ;; To use it simply open a document file with ;; @@ -147,7 +149,10 @@ ;;;; Customization Options (defgroup doc-view nil - "In-buffer viewer for PDF, PostScript, DVI, and DJVU files." + "In-buffer document viewer. +The viewer handles PDF, PostScript, DVI, DJVU, ODF, EPUB, CBZ, +FB2, XPS and OXPS files, if the appropriate converter programs +are available (see Info node `(emacs)Document View')" :link '(function-link doc-view) :version "22.2" :group 'applications @@ -256,9 +261,7 @@ doc-view-dvipdf-program `doc-view-dvipdf-program' will be preferred." :type 'file) -(define-obsolete-variable-alias 'doc-view-unoconv-program - 'doc-view-odf->pdf-converter-program - "24.4") +(define-obsolete-variable-alias 'doc-view-unoconv-program 'doc-view-odf->pdf-converter-program "24.4") (defcustom doc-view-odf->pdf-converter-program (cond @@ -382,7 +385,8 @@ doc-view--buffer-file-name (defvar doc-view-doc-type nil "The type of document in the current buffer. -Can be `dvi', `pdf', `ps', `djvu' or `odf'.") +Can be `dvi', `pdf', `ps', `djvu', `odf', 'epub', `cbz', `fb2', +`'xps' or `oxps'.") (defvar doc-view-single-page-converter-function nil "Function to call to convert a single page of the document to a bitmap file. @@ -464,17 +468,17 @@ doc-view--revert-buffer ;; It's normal for this operation to result in a very large undo entry. (setq-local undo-outer-limit (* 2 (buffer-size)))) (cl-labels ((revert () - (let ((revert-buffer-preserve-modes t)) - (apply orig-fun args) - ;; Update the cached version of the pdf file, - ;; too. This is the one that's used when - ;; rendering (bug#26996). - (unless (equal buffer-file-name - doc-view--buffer-file-name) - ;; FIXME: Lars says he needed to recreate - ;; the dir, we should figure out why. - (doc-view-make-safe-dir doc-view-cache-directory) - (write-region nil nil doc-view--buffer-file-name))))) + (let ((revert-buffer-preserve-modes t)) + (apply orig-fun args) + ;; Update the cached version of the pdf file, + ;; too. This is the one that's used when + ;; rendering (bug#26996). + (unless (equal buffer-file-name + doc-view--buffer-file-name) + ;; FIXME: Lars says he needed to recreate + ;; the dir, we should figure out why. + (doc-view-make-safe-dir doc-view-cache-directory) + (write-region nil nil doc-view--buffer-file-name))))) (if (and (eq 'pdf doc-view-doc-type) (executable-find "pdfinfo")) ;; We don't want to revert if the PDF file is corrupted which @@ -738,7 +742,7 @@ doc-view-kill-proc (interactive) (while (consp doc-view--current-converter-processes) (ignore-errors ;; Some entries might not be processes, and maybe - ;; some are dead already? + ; some are dead already? (kill-process (pop doc-view--current-converter-processes)))) (when doc-view--current-timer (cancel-timer doc-view--current-timer) @@ -799,8 +803,8 @@ doc-view--current-cache-dir ;;;###autoload (defun doc-view-mode-p (type) "Return non-nil if document type TYPE is available for `doc-view'. -Document types are symbols like `dvi', `ps', `pdf', or `odf' (any -OpenDocument format)." +Document types are symbols like `dvi', `ps', `pdf', `epub', +`cbz', `fb2', `xps', `oxps', or`odf' (any OpenDocument format)." (and (display-graphic-p) (image-type-available-p 'png) (cond @@ -811,16 +815,22 @@ doc-view-mode-p (and doc-view-dvipdfm-program (executable-find doc-view-dvipdfm-program))))) ((memq type '(postscript ps eps pdf)) - (or (and doc-view-ghostscript-program + (or (and doc-view-ghostscript-program (executable-find doc-view-ghostscript-program)) - (and doc-view-pdfdraw-program - (executable-find doc-view-pdfdraw-program)))) + ;; for pdf also check for `doc-view-pdfdraw-program' + (when (eq type 'pdf) + (and doc-view-pdfdraw-program + (executable-find doc-view-pdfdraw-program))))) ((eq type 'odf) (and doc-view-odf->pdf-converter-program (executable-find doc-view-odf->pdf-converter-program) (doc-view-mode-p 'pdf))) ((eq type 'djvu) (executable-find "ddjvu")) + ((memq type '(epub cbz fb2 xps oxps)) + ;; first check if `doc-view-pdfdraw-program' is set to mutool + (and (string= doc-view-pdfdraw-program "mutool") + (executable-find "mutool"))) (t ;; unknown image type nil)))) @@ -1053,7 +1063,7 @@ doc-view-start-process ;; some file-name-handler-managed dir, for example). (let* ((default-directory (or (unhandled-file-name-directory default-directory) - (expand-file-name "~/"))) + (expand-file-name "~/"))) (proc (apply #'start-process name doc-view-conversion-buffer program args))) (push proc doc-view--current-converter-processes) @@ -1189,7 +1199,7 @@ doc-view-pdf/ps->png "Convert PDF-PS to PNG asynchronously." (funcall (pcase doc-view-doc-type - ('pdf doc-view-pdf->png-converter-function) + ((or 'pdf 'epub 'cbz 'fb2 'xps 'oxps) doc-view-pdf->png-converter-function) ('djvu #'doc-view-djvu->tiff-converter-ddjvu) (_ #'doc-view-ps->png-converter-ghostscript)) pdf-ps png nil @@ -1227,20 +1237,20 @@ doc-view-document->bitmap (let ((rest (cdr pages))) (funcall doc-view-single-page-converter-function pdf (format png (car pages)) (car pages) - (lambda () - (if rest - (doc-view-document->bitmap pdf png rest) - ;; Yippie, the important pages are done, update the display. - (clear-image-cache) - ;; For the windows that have a message (like "Welcome to - ;; DocView") display property, clearing the image cache is - ;; not sufficient. - (dolist (win (get-buffer-window-list (current-buffer) nil 'visible)) - (with-selected-window win - (when (stringp (overlay-get (doc-view-current-overlay) 'display)) - (doc-view-goto-page (doc-view-current-page))))) - ;; Convert the rest of the pages. - (doc-view-pdf/ps->png pdf png))))))) + (lambda () + (if rest + (doc-view-document->bitmap pdf png rest) + ;; Yippie, the important pages are done, update the display. + (clear-image-cache) + ;; For the windows that have a message (like "Welcome to + ;; DocView") display property, clearing the image cache is + ;; not sufficient. + (dolist (win (get-buffer-window-list (current-buffer) nil 'visible)) + (with-selected-window win + (when (stringp (overlay-get (doc-view-current-overlay) 'display)) + (doc-view-goto-page (doc-view-current-page))))) + ;; Convert the rest of the pages. + (doc-view-pdf/ps->png pdf png))))))) (defun doc-view-pdf->txt (pdf txt callback) "Convert PDF to TXT asynchronously and call CALLBACK when finished." @@ -1337,7 +1347,9 @@ doc-view-convert-current-doc ;; Rename to doc.pdf (rename-file opdf pdf) (doc-view-pdf/ps->png pdf png-file))))) - ((or 'pdf 'djvu) + ;; The doc-view-mode-p check ensures that epub, cbz, fb2 and + ;; (o)xps are handled with mutool + ((or 'pdf 'djvu 'epub 'cbz 'fb2 'xps 'oxps) (let ((pages (doc-view-active-pages))) ;; Convert doc to bitmap images starting with the active pages. (doc-view-document->bitmap doc-view--buffer-file-name png-file pages))) @@ -1432,7 +1444,7 @@ doc-view-paper-sizes (defun doc-view-guess-paper-size (iw ih) "Guess the paper size according to the aspect ratio." (cl-labels ((div (x y) - (round (/ (* 100.0 x) y)))) + (round (/ (* 100.0 x) y)))) (let ((ar (div iw ih)) (al (mapcar (lambda (l) (list (div (nth 1 l) (nth 2 l)) (car l))) @@ -1869,6 +1881,8 @@ doc-view-set-doc-type ("dvi" dvi) ;; PDF ("pdf" pdf) ("epdf" pdf) + ;; EPUB + ("epub" epub) ;; PostScript ("ps" ps) ("eps" ps) ;; DjVu @@ -1880,7 +1894,13 @@ doc-view-set-doc-type ;; Microsoft Office formats (also handled by the odf ;; conversion chain). ("doc" odf) ("docx" odf) ("xls" odf) ("xlsx" odf) - ("ppt" odf) ("pps" odf) ("pptx" odf) ("rtf" odf)) + ("ppt" odf) ("pps" odf) ("pptx" odf) ("rtf" odf) + ;; CBZ + ("cbz" cbz) + ;; FB2 + ("fb2" fb2) + ;; (Open)XPS + ("xps" xps) ("oxps" oxps)) t)))) (content-types (save-excursion @@ -1889,7 +1909,13 @@ doc-view-set-doc-type ((looking-at "%!") '(ps)) ((looking-at "%PDF") '(pdf)) ((looking-at "\367\002") '(dvi)) - ((looking-at "AT&TFORM") '(djvu)))))) + ((looking-at "AT&TFORM") '(djvu)) + ;; The following pattern actually is for recognizing + ;; zip-archives, so that this same association is used for + ;; cbz files. This is fine, as cbz files should be handled + ;; like epub anyway. + ((looking-at "PK") '(epub)) + )))) (setq-local doc-view-doc-type (car (or (nreverse (seq-intersection name-types content-types #'eq)) diff --git a/lisp/files.el b/lisp/files.el index a11786fca2..f2c656bfde 100644 --- a/lisp/files.el +++ b/lisp/files.el @@ -2925,7 +2925,7 @@ auto-mode-alist ("\\.\\(diffs?\\|patch\\|rej\\)\\'" . diff-mode) ("\\.\\(dif\\|pat\\)\\'" . diff-mode) ; for MS-DOS ("\\.[eE]?[pP][sS]\\'" . ps-mode) - ("\\.\\(?:PDF\\|DVI\\|OD[FGPST]\\|DOCX\\|XLSX?\\|PPTX?\\|pdf\\|djvu\\|dvi\\|od[fgpst]\\|docx\\|xlsx?\\|pptx?\\)\\'" . doc-view-mode-maybe) + ("\\.\\(?:PDF\\|EPUB\\|CBZ\\|FB2\\|O?XPS\\|DVI\\|OD[FGPST]\\|DOCX\\|XLSX?\\|PPTX?\\|pdf\\|epub\\|cbz\\|fb2\\|o?xps\\|djvu\\|dvi\\|od[fgpst]\\|docx\\|xlsx?\\|pptx?\\)\\'" . doc-view-mode-maybe) ("configure\\.\\(ac\\|in\\)\\'" . autoconf-mode) ("\\.s\\(v\\|iv\\|ieve\\)\\'" . sieve-mode) ("BROWSE\\'" . ebrowse-tree-mode) -- 2.33.1