all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: hector <hectorlahoz@gmail.com>
To: emacs-devel@gnu.org
Subject: GSoC project "Hyphenation"?
Date: Fri, 23 Dec 2016 02:09:36 +0100	[thread overview]
Message-ID: <20161223010936.GA2877@workstation> (raw)
In-Reply-To: <m3d37y9go5.fsf@passepartout.tim-landscheidt.de>

[-- Attachment #1: Type: text/plain, Size: 1220 bytes --]

Hi.

I did the same and I came upon this post.

I wrote a little program in ELISP to do it.
Currently it works but I have to fix some things: patterns should not match
at the end of the word.

Since my purpose was not to hyphenate mails or output of console
I didn't wrote anything to integrate it with the available filling
or searching functions.

It just takes a word and returns a list of word "slices".

But now I'm thinking that this is some general task. Not specific to
Emacs nor TeX. Shouldn't it be a system library?

To try it:
M-: (load-patterns "FILENAME.DIC")
M-x ly:hyphenate-region

On Tue, Mar 27, 2012 at 04:01:30PM +0000, Tim Landscheidt wrote:
> Hi,
> 
> time and time again I have searched for "Emacs" and "hyphen-
> ation", and so little results came up that I looked up "hy-
> phenation" again to make sure that I hadn't misspelled it.
> It seems that it is not a feature often asked for as the
> typical workflow of text processing in Emacs usually in-
> volves TeX or something similar, but I do find myself often
> in need to hyphenate texts like mails or output of console
> programs.  With Google Summer of Code around, I'd like to
> propose the following idea "Hyphenation in GNU Emacs":
> 

[-- Attachment #2: hyphenate.el --]
[-- Type: text/plain, Size: 6249 bytes --]

;; hyphenate.el - build and manage pattern trie
;; Copyright Héctor Lahoz 2016
;;
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;
;; this program is based on the work of Franklin M. Liang
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(eval-when-compile (require 'cl))

; (optimize (safety 0)) ;; uncomment for production

(defstruct ptrie:node
  children ;; CAR - no match; CDR - match (next position)
  (char nil :read-only t)
  (final nil))

(defvar pattern-trie (make-ptrie:node :char ?\s :children '(nil . nil))
  "Root of the patterns trie")

(defun ptrie:print-trie (n path)
  "Print the tree recursively"
  (let ((path_ (concat path (make-string 1 (ptrie:node-char n)))))
    (if (null (cdr (ptrie:node-children n)))
	(progn
	  (princ path_)
	  (princ " - ")
	  (princ (ptrie:node-final n))
	  (princ "\n"))
      (ptrie:print-trie (cdr (ptrie:node-children n)) path_))
    (when (car (ptrie:node-children n))
      (ptrie:print-trie (car (ptrie:node-children n)) path))))

(defun ptrie:print-node (n)
  "Print node N for debugging"
  (let ((ret1 "Node:  :")
	(ret2 " - "))
 ;; I don't understand why this is necessary			
 ;; it seems the string referenced by ret2 is kept between calls and it is not initialised
    (aset ret2 0 ?\s)
    (aset ret2 2 ?\s)
    (aset ret1 6 (ptrie:node-char n))
    (if (null (ptrie:node-children n))
	(setq ret2 "no children")
      (when (car (ptrie:node-children n))
	(aset ret2 0 (ptrie:node-char (car (ptrie:node-children n)))))
      (when (cdr (ptrie:node-children n))
	(aset ret2 2 (ptrie:node-char (cdr (ptrie:node-children n))))))
    (concat ret1 ret2)))

(defun ptrie:find-next-char (node char &optional create)
  "Returns the node corresponding to CHAR. Add a new node when CREATE is t
and requested node doesn't exist"
  (let ((prev node)
	n
	new
	(set-prev-link 'setcdr))
    (setq n (cdr (ptrie:node-children prev)))
      
    (while (and n  ;; works too when (null node-children)
		(> char (ptrie:node-char n)))
      (setq prev n)
      (setq set-prev-link 'setcar)
      (setq n (car (ptrie:node-children n))))
    (when (or (null n)
	      (/= char (ptrie:node-char n)))
      (if (null create)
	  (setq n nil)
	(setq new (make-ptrie:node :char char
			     :children (cons n nil)))
	(when (null (ptrie:node-children prev))
	  (setf (ptrie:node-children prev) '(nil . nil)))
	(funcall set-prev-link (ptrie:node-children prev) new)
	(setq n new)))
    n))

(defun find-pattern (trie p)
  "Return pattern indicated by P starting at TRIE or nil if not found"
  (let ((n trie))
    (dotimes (i (length p) (ptrie:node-final n))
      (when (null (setq n (ptrie:find-next-char n (aref p i))))
	(return nil)))))

(defun add-pattern (trie p)
  "Add pattern P to trie TRIE"
  (let ((pnw (pat-nw p))
	(n trie)
	char)
    
    (dotimes (i (length pnw))
      (setq char (aref pnw i))
      (setq n (ptrie:find-next-char n char t)))
    (setf (ptrie:node-final n) p)))

(defun pat-nw (str)
  "Reomve weight digits from STR"
  (let ((ret nil)
	(char nil)
	(char-str nil)
	(l (length str)))
    (do ((i (- l 1) (1- i))) ((< i 0))
	(setq char (aref str i))
	(setq char-str (substring-no-properties str i (1+ i)))
	(if (not (string-match "[[:digit:]]" char-str))
	    (push char ret)))
    (concat ret)))

(defun read-pattern (buf)
  (let* ((pat))
    (setq pat (buffer-substring (point)
				(progn (beginning-of-line 2)
				       (- (point) 1))))
    (if (or (equal pat "")
	    (equal pat "\n"))
	nil
      pat)))

(defun load-patterns (file)
  (let ((hyphen-patterns (find-file-read-only file))
	(pat nil)
	(pat-nw nil)
	(n pattern-trie)
	(tmp)
	(i))
    (while (setq pat (read-pattern hyphen-patterns))
      (add-pattern pattern-trie pat))))

(defmacro digitp (c)
  "True if c is a digit"
  (if (and (< 47 (eval c))
	   (> 58 (eval c)))
      't
    'nil))

;; TODO optimise
(defun ly:hyphenate-word (word)
  "Returns WORD with hyphens added"
  (let* (s-word
	pat
	weight
	ret
	p-found
	(hpos 0)
	;; add markers at beginning and end
	(delim-word (concat "." word "."))
	(hyphen-weights (make-vector (length delim-word) 0)))
    (dotimes (anchor (length delim-word))
      (setq s-word (substring delim-word anchor))
      (do ((end 1 (1+ end))) ((> end (length s-word)))
	(when (setq pat (find-pattern pattern-trie (substring s-word 0 end)))
	  ;; store weights
	  (setq hpos 0)
	  (dotimes (pos (length pat))
	    (if (not (digitp (aref pat pos)))
		(setq hpos (1+ hpos))
	      (setq weight (- (aref pat pos) ?0))
	      (when (> weight (aref hyphen-weights (+ anchor hpos)))
		(aset hyphen-weights (+ anchor hpos) weight)))))))

    (dotimes (i (length word))
      ;; avoid hyphens before word (when i == 1)
      ;; e.g. pattern "1de" matches the word "de" so it produces " -- de"
      ;; perhaps we should modify the preceding algorithm, not to include
      ;; them in the first place
      (when (and (/= i 1)
		 (= (% (aref hyphen-weights (1+ i)) 2) 1))
	(push " -- " ret))
      (push (aref word i) ret))
    (mapconcat (lambda (s)
		 (if (stringp s)
		     s
		   (string s)))
	       (nreverse ret)
	       "")))

(defun ly:hyphenate-region (beg end)
  "Add lilypond centered hyphens to every word in the region"
  (interactive "r")
  (save-excursion
    (goto-char beg)
    (search-forward "{" (line-beginning-position 2) t)
    (let ((end (copy-marker end))
	  word-beg)
      (while (< (point) end)
	(skip-chars-forward "^a-zA-Záéíóúñäëöüß") ;; find next word
	(setq word-beg (point))
	(forward-word)
	(insert	(ly:hyphenate-word 
		 (prog1
		     (buffer-substring-no-properties word-beg (point))
		   (delete-region word-beg (point)))))))))

      parent reply	other threads:[~2016-12-23  1:09 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-27 16:01 GSoC project "Hyphenation"? Tim Landscheidt
2012-03-27 17:48 ` Deniz Dogan
2012-03-27 18:04   ` Eli Zaretskii
2012-03-27 18:40 ` Stefan Monnier
2012-03-28  1:01   ` Miles Bader
2012-03-28 12:47     ` Stefan Monnier
2016-12-23  1:09 ` hector [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161223010936.GA2877@workstation \
    --to=hectorlahoz@gmail.com \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.