From 20f837a1197bdf3cc11896f214b0f30327b6837c Mon Sep 17 00:00:00 2001 From: Andrew Kensler Date: Fri, 20 Jan 2023 18:17:41 -0800 Subject: [PATCH] Add new minor `balanced-fill-mode` to filling When enabled, filling will consider the entire paragraph at a time and try to place line breaks optimally to look more neat and even, according to a cost function. This is inspired by the Knuth-Plass algorithm. * lisp/textmodes/fill.el (balanced-fill-mode) (balanced-fill-maximum-words) (balanced-fill-margin) (balanced-fill-widows) (balanced-fill-length-exponent) (balanced-fill-raggedness-penalty) (balanced-fill-single-penalty) (balanced-fill-break-penalty): New variables. (balanced-fill-break-lines): New line breaking function. (fill-region-as-paragraph): Use it to fill paragraphs. * test/lisp/textmodes/fill-tests.el: Add smoke test for it. * doc/emacs/emacs.texi (Top): * doc/emacs/text.texi (Filling Text): Document it. * etc/NEWS: Announce it. --- doc/emacs/emacs.texi | 1 + doc/emacs/text.texi | 91 +++++++++++++ etc/NEWS | 6 + lisp/textmodes/fill.el | 214 ++++++++++++++++++++++++++++++ test/lisp/textmodes/fill-tests.el | 15 +++ 5 files changed, 327 insertions(+) diff --git a/doc/emacs/emacs.texi b/doc/emacs/emacs.texi index b6d149eb3e..babd487fbd 100644 --- a/doc/emacs/emacs.texi +++ b/doc/emacs/emacs.texi @@ -616,6 +616,7 @@ Top * Fill Prefix:: Filling paragraphs that are indented or in a comment, etc. * Adaptive Fill:: How Emacs can determine the fill prefix automatically. +* Balanced Fill:: Breaking lines to look more even and neat. Outline Mode diff --git a/doc/emacs/text.texi b/doc/emacs/text.texi index 6e16e743a5..251452df17 100644 --- a/doc/emacs/text.texi +++ b/doc/emacs/text.texi @@ -497,6 +497,7 @@ Filling * Fill Commands:: Commands to refill paragraphs and center lines. * Fill Prefix:: Filling paragraphs that are indented or in a comment, etc. * Adaptive Fill:: How Emacs can determine the fill prefix automatically. +* Balanced Fill:: Breaking lines to look more even and neat. @end menu @node Auto Fill @@ -828,6 +829,96 @@ Adaptive Fill line. If it returns @code{nil}, @code{adaptive-fill-regexp} gets a chance to find a prefix. +@node Balanced Fill +@subsection Balanced Filling + +@cindex balanced filling + Filling can consider an entire paragraph at a time when determining +where to place line breaks, as an alternative to just greedily +inserting a break after the last word that fits on each line. + + In this mode, it will try to optimally choose the set of line +breaks to minimize a cost function that penalizes untidy paragraphs. +This may place line breaks sooner than necessary if it improves +later lines. + + For example, if @code{fill-column} is 60 and balanced filling is +disabled then the greedy algorithm will fill the following paragraph +like so: + +@smallexample +"It's not too far-fetched to say that the best programs are +the ones written when the programmer is supposed to be +working on something else... Very good things happen when +management is enlightened enough to appreciate the +importance of allowing programmers some free time for +projects of this sort." --Melinda Varian +@end smallexample + +@noindent +while enabling the balanced filling will instead produce the following +with slightly shorter but more even line lengths: + +@smallexample +"It's not too far-fetched to say that the best programs +are the ones written when the programmer is supposed to +be working on something else... Very good things happen +when management is enlightened enough to appreciate the +importance of allowing programmers some free time for +projects of this sort." -- Melinda Varian +@end smallexample + +@findex balanced-fill-mode +@vindex balanced-fill-mode + Balanced Fill mode is disabled by default. To toggle it globally, +type @kbd{M-x balanced-fill-mode}. + +@vindex balanced-fill-maximum-words + For speed, the maximum limit on the number of words that the +balanced fill algorithm will attempt to process in a single paragraph +is controlled by @code{balanced-fill-maximum-words}. If the number +of words in the paragraph exceeds this limit, then filling will fall +back to the faster greedy algorithm. + + The cost function used to determine where to place the line breaks +can be tuned through several variables. + +@vindex balanced-fill-margin + Set @code{balanced-fill-margin} to the number of columns before +the fill-column that the balanced fill algorithm should attempt +to break lines at. Attempting to break lines slightly short +of the @code{fill-column} but being allowed to go up to the +@code{fill-column} can help to make the lines more even in length. + +@vindex balanced-fill-widows + Set @code{balanced-fill-widows} to the minimum number of words that +the algorithm should attempt to leave on the last line of a paragraph. +If this is set to an extremely high number, then the balanced filling +will generally try to make the last line as full as all the others. + +@vindex balanced-fill-length-exponent + The @code{balanced-fill-length-exponent} is the main control on the +cost function. It affects the penalty for lines that are shorter or +longer than the target length to the margin. The difference between +the two lengths will be raised to this power when calculating the +cost of a potential line break. + +@vindex balanced-fill-raggedness-penalty +@vindex balanced-fill-single-penalty +@vindex balanced-fill-break-penalty + There are also several minor additive penalties to help improve +the appearance. The @code{balanced-fill-raggedness-penalty} applies +for each column of difference in length for a line relative to the +previous line, unless this is the last line and longer than second +to last. Higher numbers make it try harder to keep all lines as +even as possible in length at the expense of other factors. The +@code{balanced-fill-single-penalty} is added either for starting a +new sentence with a single word right at the end of a line, or else +for ending a sentence with a single word left at the start of a line. +Finally, there is a @code{balanced-fill-break-penalty} for each line +break added. The larger this is, the more the algorithm will try to +minimize the number of lines despite the other penalties. + @node Case @section Case Conversion Commands @cindex case conversion diff --git a/etc/NEWS b/etc/NEWS index 10e91ec4ab..1379b2d90a 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -53,6 +53,12 @@ trash when deleting. Default is nil. * Editing Changes in Emacs 30.1 +** Filling can now try to break lines evenly. +The new user option 'balanced-fill-mode' can be set to non-nil to +make filling consider the entire paragraph at a time and try to +place line breaks optimally to look more neat and even, according +to a cost function. This is inspired by the Knuth-Plass algorithm. + --- ** New command 'kill-matching-buffers-no-ask'. This works like 'kill-matching-buffers', but without asking for diff --git a/lisp/textmodes/fill.el b/lisp/textmodes/fill.el index 2fde2ff6c4..c87b0afd3a 100644 --- a/lisp/textmodes/fill.el +++ b/lisp/textmodes/fill.el @@ -132,6 +132,86 @@ adaptive-fill-function :version "27.1" :type 'function) +(define-minor-mode balanced-fill-mode + "Toggle whether filling should try to neatly balance line lengths. + +When enabled, filling will consider the entire paragraph and +try to optimally choose a set of line breaks to minimize a +cost function that penalizes untidy paragraphs. This may +place line breaks sooner than necessary if it improves later +lines. When disabled, filling uses the traditional greedy line +breaking algorithm. + +See Info node `(emacs) Balanced Fill' for more details." + :global t + :version "30.1" + :type 'boolean + :group 'fill) + +(defcustom balanced-fill-maximum-words 500 + "Maximum limit on the number of words that the balanced fill +algorithm will attempt to process in a single paragraph. If +the paragraph exceeds this limit, filling will fall back to +the standard greedy algorithm." + :version "30.1" + :type 'integer + :group 'fill) + +(defcustom balanced-fill-margin 3 + "Number of columns before the fill-column that the balanced fill +algorithm will attempt to break lines at. Attempting to break +lines slightly short of the fill-column but being allowed to +go up to the fill-column can help to make the lines more even +in length." + :version "30.1" + :type 'integer + :group 'fill) + +(defcustom balanced-fill-widows 2 + "Minimum number of words that the algorithm will attempt to leave +on the last line of a paragraph. If this is set to an extremely +high number, then the balanced filling will generally try to make +the last line as full as all the others." + :version "30.1" + :type 'integer + :group 'fill) + +(defcustom balanced-fill-length-exponent 3 + "Controls the penalty for lines that are shorter or longer than +the target length to the margin. The difference between the +actual and target length will be raised to this power when +calculating the cost of a potential line break. This is the +main control for the cost function." + :version "30.1" + :type 'integer + :group 'fill) + +(defcustom balanced-fill-raggedness-penalty 40 + "Additional penalty for each column of difference in length +relative to the previous line, unless this is the last line +and longer than second to last. Higher numbers make it try +harder to keep all lines as even as possible in length at the +expense of other factors." + :version "30.1" + :type 'integer + :group 'fill) + +(defcustom balanced-fill-single-penalty 150 + "Additional penalty either for starting a new sentence with a +single word right at the end of a line, or else for ending a +sentence with a single word left at the start of a line." + :version "30.1" + :type 'integer + :group 'fill) + +(defcustom balanced-fill-break-penalty 50 + "Additional penalty for each line break added. The larger this +is, the more the algorithm will try to minimize the number of +lines despite the other penalties." + :version "30.1" + :type 'integer + :group 'fill) + (defvar fill-indent-according-to-mode nil ;Screws up CC-mode's filling tricks. "Whether or not filling should try to use the major mode's indentation.") @@ -644,6 +724,135 @@ fill-indent-to-left-margin (indent-line-to (current-left-margin)) (put-text-property beg (point) 'face 'default))) +(defun balanced-fill-break-lines (from to justify) + ;; Build a table of visible word widths, with and without any preceding + ;; spaces, along with whether the word starts a new sentence. We go by + ;; columns and not chars to handle invisible text (especially invisible + ;; spaces), etc. + (let ((words '()) + (count 0) + (sentence-regexp (sentence-end))) + (goto-char to) + (while (> (point) from) + (let* ((previous (point)) + (end (current-column)) + (next (progn (fill-move-to-break-point from) + ;; Needed for moving past some Org-mode links + (skip-chars-backward " \t") + (point))) + (with-space (current-column)) + (without-space (progn (skip-chars-forward " \t") + (current-column))) + (new-sentence (looking-back sentence-regexp from))) + (goto-char next) + ;; If point didn't move, we're after the first word of the line. + (when (<= previous next) + (goto-char from) + (setq with-space (current-column) + without-space (current-column))) + ;; Merge into previous entry when moving past invisible text. + (if (= end without-space) + (setq end (+ end (or (car (pop words)) 0)))) + (push (list (- end with-space) (- end without-space) new-sentence) + words))) + (setq words (vconcat words)) + (setq count (length words)) + + ;; Abort (and fall back to greedy algorithm) if we have too many words. + ;; The algorithm below is worst-case quadratic (though usually not). + ;; Asymptotically faster algorithms exist, but are more complicated. + (if (> count balanced-fill-maximum-words) + nil + + ;; Consider each word as a candidate to start a line, and build a + ;; table of which word the previous line would be best to start on + ;; and the width of the previous line. Use dynamic programming to + ;; minimize a cost function. + (let ((starts (make-vector (1+ count) 0)) + (widths (make-vector (1+ count) 0))) + (let* ((start 0) + (room (- fill-column (current-column))) + (costs (make-vector (1+ count) most-positive-fixnum)) + (rags (make-vector (1+ count) balanced-fill-margin))) + (aset costs 0 0) + (while (< start count) + ;; The room for the first line may be different than the rest. + (if (= start 1) + (setq room (- fill-column + (string-width (or fill-prefix ""))))) + ;; Consider each word this new line might end on. Don't test + ;; width against room yet; we want at least one word per line, + ;; so we need at least one iteration. + (let ((end start) + (width 0)) + (while (< end count) + ;; Don't add the space before the first word to the width. + (setq width (+ width (nth (if (= end start) 1 0) + (aref words end)))) + ;; Our cost function is the sum of the best total cost + ;; for all the lines preceding the start of this one, + (let ((cost (+ (aref costs start) + ;; plus the distance from the margin + (expt (abs (- room balanced-fill-margin width)) + ;; exponentiated if not the last line + (if (or (/= (1+ end) count) + ;; or if we'd leave a widow, + (< (- (1+ end) start) + balanced-fill-widows)) + balanced-fill-length-exponent 1)) + ;; plus a penalty for an uneven right side + (* (abs (- room width + (aref rags start))) + ;; if not the last line + (if (or (/= (1+ end) count) + ;; unless the last line is longer, + (< (- room width) + (aref rags start))) + balanced-fill-raggedness-penalty 0)) + ;; plus a penalty if ending on the + ;; start of a new sentence, + (if (nth 2 (aref words end)) + balanced-fill-single-penalty 0) + ;; and a penalty if starting with + ;; a single word and then a new sentence, + (if (and (< (1+ start) count) + (nth 2 (aref words (1+ start)))) + balanced-fill-single-penalty 0) + ;; and a penalty for each break. + balanced-fill-break-penalty))) + ;; For ending after here, is this a better starting place? + (when (and (<= cost (aref costs (1+ end))) + (or (<= width room) (= end start))) + (aset costs (1+ end) cost) + (aset starts (1+ end) start) + (aset widths (1+ end) width) + (aset rags (1+ end) (- room width)))) + ;; Break the inner (end) loop if we're out of room now. + (if (>= width room) + (setq end count)) + (setq end (1+ end)))) + (setq start (1+ start)))) + + ;; Walk backwards from the end of the table to reconstruct a list + ;; of the optimal widths for each line. + (let (chosen) + (let ((end count)) + (while (> end 0) + (if (< end count) + (push (aref widths end) chosen)) + (setq end (aref starts end)))) + + ;; Then insert those line breaks and justify each line. + (mapc (lambda (width) + (move-to-column (+ (current-column) width)) + (fill-newline) + (if justify + (save-excursion + (forward-line -1) + (justify-current-line justify nil t)))) + chosen) + (if justify (justify-current-line justify t t))))))) + (defun fill-region-as-paragraph (from to &optional justify nosqueeze squeeze-after) "Fill the region as if it were a single paragraph. @@ -762,6 +971,11 @@ fill-region-as-paragraph ;; This is the actual filling loop. (goto-char from) + ;; Attempt to break into balanced lines if desired. + (when balanced-fill-mode + (balanced-fill-break-lines from to justify)) + ;; Otherwise (if point is still at from), + ;; fall back to the standard greedy line breaking loop. (let (linebeg) (while (< (point) to) (setq linebeg (point)) diff --git a/test/lisp/textmodes/fill-tests.el b/test/lisp/textmodes/fill-tests.el index ef822ba805..11db6a499f 100644 --- a/test/lisp/textmodes/fill-tests.el +++ b/test/lisp/textmodes/fill-tests.el @@ -121,6 +121,21 @@ test-fill-haskell ;; w "))) +(ert-deftest fill-test-balanced-fill-mode nil + "Basic test of the `balanced-fill-mode' option." + (with-temp-buffer + (insert "Ccc ccc a bb dddd bb bb a ccc a jjjjjjjjjj a eeeee a hhhhhhhh bb dddd.") + (setq fill-column 15) + (setq-local balanced-fill-mode nil) + (fill-paragraph) + (should (string= (buffer-string) "Ccc ccc a bb\ndddd bb bb a\nccc a\njjjjjjjjjj a\neeeee a\nhhhhhhhh bb\ndddd."))) + (with-temp-buffer + (insert "Ccc ccc a bb dddd bb bb a ccc a jjjjjjjjjj a eeeee a hhhhhhhh bb dddd.") + (setq fill-column 15) + (setq-local balanced-fill-mode t) + (fill-paragraph) + (should (string= (buffer-string) "Ccc ccc a\nbb dddd bb\nbb a ccc a\njjjjjjjjjj\na eeeee a\nhhhhhhhh\nbb dddd.")))) + (provide 'fill-tests) ;;; fill-tests.el ends here -- 2.17.1