From 59d3e64d46434cf8ad13d3329ef73e78bd0b56b6 Mon Sep 17 00:00:00 2001 From: Jim Porter Date: Sun, 17 Mar 2024 12:01:59 -0700 Subject: [PATCH 1/2] Allow toggling "readable" mode in EWW Additionally, add an option to prevent adding a new history entry for each call of 'eww-readable' (bug#68254). * lisp/net/eww.el (eww-retrieve): * lisp/net/eww.el (eww-readable-adds-to-history): New option. (eww-retrieve): Make sure we call CALLBACK in all configurations. (eww-render): Simplify how to pass encoding. (eww--parse-html-region, eww-display-document): New functions, extracted from... (eww-display-html): ... here. (eww-document-base): New function. (eww-readable): Toggle "readable" mode interactively, like with a minor mode. Consult 'eww-readable-adds-to-history'. (eww-reload): Use 'eshell-display-document'. * test/lisp/net/eww-tests.el (eww-test--with-mock-retrieve): Fix indent. (eww-test/display/html, eww-test/readable/toggle-display): New tests. * doc/misc/eww.texi (Basics): Describe the new behavior. * etc/NEWS: Announce this change. --- doc/misc/eww.texi | 5 ++ etc/NEWS | 12 ++++ lisp/net/eww.el | 127 ++++++++++++++++++++++++------------- test/lisp/net/eww-tests.el | 57 ++++++++++++++++- 4 files changed, 155 insertions(+), 46 deletions(-) diff --git a/doc/misc/eww.texi b/doc/misc/eww.texi index d31fcf1802b..522034c874d 100644 --- a/doc/misc/eww.texi +++ b/doc/misc/eww.texi @@ -146,6 +146,11 @@ Basics which part of the document contains the ``readable'' text, and will only display this part. This usually gets rid of menus and the like. + When called interactively, this command toggles the display of the +readable parts. With a positive prefix argument, this command always +displays the readable parts, and with a zero or negative prefix, it +always displays the full page. + @findex eww-toggle-fonts @vindex shr-use-fonts @kindex F diff --git a/etc/NEWS b/etc/NEWS index b02712dd21c..dd4c1ea2fac 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1054,6 +1054,18 @@ entries newer than the current page. To change the behavior when browsing from "historical" pages, you can customize 'eww-before-browse-history-function'. ++++ +*** 'eww-readable' now toggles display of the readable parts of a web page. +When called interactively, 'eww-readable' toggles whether to display +only the readable parts of a page or the full page. With a positive +prefix argument, it always displays the readable parts, and with a zero +or negative prefix, it always displays the full page. + +--- +*** New option 'eww-readable-adds-to-history'. +When non-nil (the default), calling 'eww-readable' adds a new entry to +the EWW page history. + ** go-ts-mode +++ diff --git a/lisp/net/eww.el b/lisp/net/eww.el index 54847bdf396..54b65d35164 100644 --- a/lisp/net/eww.el +++ b/lisp/net/eww.el @@ -275,6 +275,11 @@ eww-url-transformers :type '(repeat function) :version "29.1") +(defcustom eww-readable-adds-to-history t + "If non-nil, calling `eww-readable' adds a new entry to the history." + :type 'boolean + :version "30.1") + (defface eww-form-submit '((((type x w32 ns haiku pgtk android) (class color)) ; Like default mode line :box (:line-width 2 :style released-button) @@ -464,11 +469,11 @@ eww (defun eww-retrieve (url callback cbargs) (cond ((null eww-retrieve-command) - (url-retrieve url #'eww-render cbargs)) + (url-retrieve url callback cbargs)) ((eq eww-retrieve-command 'sync) (let ((data-buffer (url-retrieve-synchronously url))) (with-current-buffer data-buffer - (apply #'eww-render nil cbargs)))) + (apply callback nil cbargs)))) (t (let ((buffer (generate-new-buffer " *eww retrieve*")) (error-buffer (generate-new-buffer " *eww error*"))) @@ -673,9 +678,9 @@ eww-render (insert (format "Direct link to the document" url)) (goto-char (point-min)) - (eww-display-html charset url nil point buffer encode)) + (eww-display-html (or encode charset) url nil point buffer)) ((eww-html-p (car content-type)) - (eww-display-html charset url nil point buffer encode)) + (eww-display-html (or encode charset) url nil point buffer)) ((equal (car content-type) "application/pdf") (eww-display-pdf)) ((string-match-p "\\`image/" (car content-type)) @@ -726,34 +731,40 @@ eww-detect-charset (declare-function libxml-parse-html-region "xml.c" (start end &optional base-url discard-comments)) -(defun eww-display-html (charset url &optional document point buffer encode) +(defun eww--parse-html-region (start end &optional coding-system) + "Parse the HTML between START and END, returning the DOM as an S-expression. +Use CODING-SYSTEM to decode the region; if nil, decode as UTF-8. + +This replaces the region with the preprocessed HTML." + (setq coding-system (or coding-system 'utf-8)) + (with-restriction start end + (condition-case nil + (decode-coding-region (point-min) (point-max) coding-system) + (coding-system-error nil)) + ;; Remove CRLF and replace NUL with � before parsing. + (while (re-search-forward "\\(\r$\\)\\|\0" nil t) + (replace-match (if (match-beginning 1) "" "�") t t)) + (eww--preprocess-html (point-min) (point-max)) + (libxml-parse-html-region (point-min) (point-max)))) + +(defsubst eww-document-base (url dom) + `(base ((href . ,url)) ,dom)) + +(defun eww-display-document (document &optional point buffer) (unless (fboundp 'libxml-parse-html-region) (error "This function requires Emacs to be compiled with libxml2")) + (setq buffer (or buffer (current-buffer))) (unless (buffer-live-p buffer) (error "Buffer %s doesn't exist" buffer)) ;; There should be a better way to abort loading images ;; asynchronously. (setq url-queue nil) - (let ((document - (or document - (list - 'base (list (cons 'href url)) - (progn - (setq encode (or encode charset 'utf-8)) - (condition-case nil - (decode-coding-region (point) (point-max) encode) - (coding-system-error nil)) - (save-excursion - ;; Remove CRLF and replace NUL with � before parsing. - (while (re-search-forward "\\(\r$\\)\\|\0" nil t) - (replace-match (if (match-beginning 1) "" "�") t t))) - (eww--preprocess-html (point) (point-max)) - (libxml-parse-html-region (point) (point-max)))))) - (source (and (null document) - (buffer-substring (point) (point-max))))) + (let ((url (when (eq (car document) 'base) + (alist-get 'href (cadr document))))) + (unless url + (error "Document is missing base URL")) (with-current-buffer buffer (setq bidi-paragraph-direction nil) - (plist-put eww-data :source source) (plist-put eww-data :dom document) (let ((inhibit-read-only t) (inhibit-modification-hooks t) @@ -794,6 +805,16 @@ eww-display-html (forward-line 1))))) (eww-size-text-inputs)))) +(defun eww-display-html (charset url &optional document point buffer) + (let ((source (buffer-substring (point) (point-max)))) + (with-current-buffer buffer + (plist-put eww-data :source source))) + (eww-display-document + (or document + (eww-document-base + url (eww--parse-html-region (point) (point-max) charset))) + point buffer)) + (defun eww-handle-link (dom) (let* ((rel (dom-attr dom 'rel)) (href (dom-attr dom 'href)) @@ -1055,30 +1076,47 @@ eww-toggle-paragraph-direction "automatic" bidi-paragraph-direction))) -(defun eww-readable () - "View the main \"readable\" parts of the current web page. +(defun eww-readable (&optional arg) + "Toggle display of only the main \"readable\" parts of the current web page. This command uses heuristics to find the parts of the web page that -contains the main textual portion, leaving out navigation menus and -the like." - (interactive nil eww-mode) +contain the main textual portion, leaving out navigation menus and the +like. + +If called interactively, toggle the display of the readable parts. If +the prefix argument is positive, display the readable parts, and if it +is zero or negative, display the full page. + +If called from Lisp, toggle the display of the readable parts if ARG is +`toggle'. Display the readable parts if ARG is nil, omitted, or is a +positive number. Display the full page if ARG is a negative number. + +When `eww-readable-adds-to-history' is non-nil, calling this function +adds a new entry to `eww-history'." + (interactive (list (if current-prefix-arg + (prefix-numeric-value current-prefix-arg) + 'toggle)) + eww-mode) (let* ((old-data eww-data) - (dom (with-temp-buffer + (make-readable (cond + ((eq arg 'toggle) + (not (plist-get old-data :readable))) + ((and (numberp arg) (< arg 1)) + nil) + (t t))) + (dom (with-temp-buffer (insert (plist-get old-data :source)) - (condition-case nil - (decode-coding-region (point-min) (point-max) 'utf-8) - (coding-system-error nil)) - (eww--preprocess-html (point-min) (point-max)) - (libxml-parse-html-region (point-min) (point-max)))) + (eww--parse-html-region (point-min) (point-max)))) (base (plist-get eww-data :url))) - (eww-score-readability dom) - (eww-save-history) - (eww--before-browse) - (eww-display-html nil nil - (list 'base (list (cons 'href base)) - (eww-highest-readability dom)) - nil (current-buffer)) - (dolist (elem '(:source :url :title :next :previous :up :peer)) - (plist-put eww-data elem (plist-get old-data elem))) + (when make-readable + (eww-score-readability dom) + (setq dom (eww-highest-readability dom))) + (when eww-readable-adds-to-history + (eww-save-history) + (eww--before-browse) + (dolist (elem '(:source :url :title :next :previous :up :peer)) + (plist-put eww-data elem (plist-get old-data elem)))) + (eww-display-document (eww-document-base base dom)) + (plist-put eww-data :readable make-readable) (eww--after-page-change))) (defun eww-score-readability (node) @@ -1398,8 +1436,7 @@ eww-reload (if local (if (null (plist-get eww-data :dom)) (error "No current HTML data") - (eww-display-html 'utf-8 url (plist-get eww-data :dom) - (point) (current-buffer))) + (eww-display-document (plist-get eww-data :dom) (point))) (let ((parsed (url-generic-parse-url url))) (if (equal (url-type parsed) "file") ;; Use Tramp instead of url.el for files (since url.el diff --git a/test/lisp/net/eww-tests.el b/test/lisp/net/eww-tests.el index bd00893d503..a09e0a4f279 100644 --- a/test/lisp/net/eww-tests.el +++ b/test/lisp/net/eww-tests.el @@ -33,7 +33,7 @@ eww-test--with-mock-retrieve "Evaluate BODY with a mock implementation of `eww-retrieve'. This avoids network requests during our tests. Additionally, prepare a temporary EWW buffer for our tests." - (declare (indent 1)) + (declare (indent 0)) `(cl-letf (((symbol-function 'eww-retrieve) (lambda (url callback args) (with-temp-buffer @@ -48,6 +48,24 @@ eww-test--history-urls ;;; Tests: +(ert-deftest eww-test/display/html () + "Test displaying a simple HTML page." + (eww-test--with-mock-retrieve + (let ((eww-test--response-function + (lambda (url) + (concat "Content-Type: text/html\n\n" + (format "

Hello

%s" + url))))) + (eww "example.invalid") + ;; Check that the buffer contains the rendered HTML. + (should (equal (buffer-string) "Hello\n\n\nhttp://example.invalid/\n")) + (should (equal (get-text-property (point-min) 'face) + '(shr-text shr-h1))) + ;; Check that the DOM includes the `base'. + (should (equal (pcase (plist-get eww-data :dom) + (`(base ((href . ,url)) ,_) url)) + "http://example.invalid/"))))) + (ert-deftest eww-test/history/new-page () "Test that when visiting a new page, the previous one goes into the history." (eww-test--with-mock-retrieve @@ -176,5 +194,42 @@ eww-test/history/before-navigate/clone-previous "http://one.invalid/"))) (should (= eww-history-position 0))))) +(ert-deftest eww-test/readable/toggle-display () + "Test toggling the display of the \"readable\" parts of a web page." + (eww-test--with-mock-retrieve + (let* ((shr-width most-positive-fixnum) + (shr-use-fonts nil) + (words (string-join + (make-list + 20 "All work and no play makes Jack a dull boy.") + " ")) + (eww-test--response-function + (lambda (_url) + (concat "Content-Type: text/html\n\n" + "" + "This is an uninteresting sentence." + "
" + words + "
" + "")))) + (eww "example.invalid") + ;; Make sure EWW renders the whole document. + (should-not (plist-get eww-data :readable)) + (should (string-prefix-p + "This is an uninteresting sentence." + (buffer-substring-no-properties (point-min) (point-max)))) + (eww-readable 'toggle) + ;; Now, EWW should render just the "readable" parts. + (should (plist-get eww-data :readable)) + (should (string-match-p + (concat "\\`" (regexp-quote words) "\n*\\'") + (buffer-substring-no-properties (point-min) (point-max)))) + (eww-readable 'toggle) + ;; Finally, EWW should render the whole document again. + (should-not (plist-get eww-data :readable)) + (should (string-prefix-p + "This is an uninteresting sentence." + (buffer-substring-no-properties (point-min) (point-max))))))) + (provide 'eww-tests) ;; eww-tests.el ends here -- 2.25.1