unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] Add an option to not reduce vocabulary of the Japanese
@ 2022-06-03  3:16 Taiju HIGASHI
  2022-06-03  6:12 ` Eli Zaretskii
  2022-06-03 23:52 ` Richard Stallman
  0 siblings, 2 replies; 42+ messages in thread
From: Taiju HIGASHI @ 2022-06-03  3:16 UTC (permalink / raw)
  To: emacs-devel; +Cc: higashi

[-- Attachment #1: Type: text/plain, Size: 630 bytes --]

Hi,

The Japanese dictionary bundled with Emacs has a small vocabulary.

For example, to convert "なごや" to "名古屋" (Nagoya) in Kanji, I would
enter "なご" and convert it to "名古", then enter "や" and convert it to
"屋".
Because the Japanese dictionary bundled with Emacs does not have "名古屋
".

The skkdic-convert function in the ja-dic-cnv package generates the
Japanese dictionary, but the logic includes the dictionary vocabulary
reduction process.

So I have created a patch to add an option to skip this reduction
process. I would be happy to receive your review and feedback.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Add-an-option-to-not-reduce-vocabulary-of-the-Japane.patch --]
[-- Type: text/x-patch, Size: 6669 bytes --]

From 8afafacf87af38ef0fd3193d5133cf06de365c65 Mon Sep 17 00:00:00 2001
From: Taiju HIGASHI <higashi@taiju.info>
Date: Thu, 2 Jun 2022 23:24:13 +0900
Subject: [PATCH] Add an option to not reduce vocabulary of the Japanese
 dictionary.

* configure.ac: Add "with-ja-dic-reduction" configure argument.
* leim/Makefile.in: Add "JA_DIC_NO_REDUCTION_OPTION" variable.
* lisp/international/ja-dic-cnv.el (skkdic-convert-okuri-nasi): Add
"no-reduction" argument.
(skkdic-convert): Add "no-reduction" optional argument.
(batch-skkdic-convert): Add "--no-reduction" command line argument.
---
 configure.ac                     |  7 +++++++
 leim/Makefile.in                 |  4 +++-
 lisp/international/ja-dic-cnv.el | 26 ++++++++++++++++++--------
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index ed8ec890ac..e28715ad43 100644
--- a/configure.ac
+++ b/configure.ac
@@ -491,6 +491,7 @@ OPTION_DEFAULT_ON([threads],[don't compile with elisp threading support])
 OPTION_DEFAULT_OFF([native-compilation],[compile with Emacs Lisp native compiler support])
 OPTION_DEFAULT_OFF([cygwin32-native-compilation],[use native compilation on 32-bit Cygwin])
 OPTION_DEFAULT_ON([xinput2],[don't use version 2 of the X Input Extension for input])
+OPTION_DEFAULT_ON([ja-dic-reduction],[don't reduce the Japanese dictionary])
 
 AC_ARG_WITH([file-notification],[AS_HELP_STRING([--with-file-notification=LIB],
  [use a file notification library (LIB one of: yes, inotify, kqueue, gfile, w32, no)])],
@@ -6491,6 +6492,7 @@ AS_ECHO(["  Does Emacs use -lXaw3d?                                 ${HAVE_XAW3D
   Which dumping strategy does Emacs use?                  ${with_dumping}
   Does Emacs have native lisp compiler?                   ${HAVE_NATIVE_COMP}
   Does Emacs use version 2 of the the X Input Extension?  ${HAVE_XINPUT2}
+  Does Emacs reduce the Japanese dictionary?              ${with_ja_dic_reduction}
 "])
 
 if test -n "${EMACSDATA}"; then
@@ -6589,6 +6591,11 @@ SUBDIR_MAKEFILES_IN=`echo " ${SUBDIR_MAKEFILES}" | sed -e 's| | $(srcdir)/|g' -e
 
 AC_SUBST(SUBDIR_MAKEFILES_IN)
 
+if test "$with_ja_dic_reduction" = "no"; then
+  JA_DIC_NO_REDUCTION_OPTION=--no-reduction
+fi
+AC_SUBST([JA_DIC_NO_REDUCTION_OPTION])
+
 dnl You might wonder (I did) why epaths.h is generated by running make,
 dnl rather than just letting configure generate it from epaths.in.
 dnl One reason is that the various paths are not fully expanded (see above);
diff --git a/leim/Makefile.in b/leim/Makefile.in
index 3b4216c0b8..f1a476a035 100644
--- a/leim/Makefile.in
+++ b/leim/Makefile.in
@@ -32,6 +32,8 @@ leimdir = ${srcdir}/../lisp/leim
 
 EXEEXT = @EXEEXT@
 
+JA_DIC_NO_REDUCTION_OPTION = @JA_DIC_NO_REDUCTION_OPTION@
+
 -include ${top_builddir}/src/verbose.mk
 
 # Prevent any settings in the user environment causing problems.
@@ -134,7 +136,7 @@ generate-ja-dic: ${leimdir}/ja-dic/ja-dic.el
 ${leimdir}/ja-dic/ja-dic.el: $(srcdir)/SKK-DIC/SKK-JISYO.L
 	$(AM_V_GEN)$(RUN_EMACS) -batch -l ja-dic-cnv \
 	  --eval "(setq max-specpdl-size 5000)" \
-	  -f batch-skkdic-convert -dir "$(leimdir)/ja-dic" "$<"
+	  -f batch-skkdic-convert -dir "$(leimdir)/ja-dic" $(JA_DIC_NO_REDUCTION_OPTION) "$<"
 
 ${srcdir}/../lisp/language/pinyin.el: ${srcdir}/MISC-DIC/pinyin.map
 	$(AM_V_GEN)${RUN_EMACS} -l titdic-cnv -f pinyin-convert $< $@
diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el
index 704f1a1ae6..7d3103fd8d 100644
--- a/lisp/international/ja-dic-cnv.el
+++ b/lisp/international/ja-dic-cnv.el
@@ -295,7 +295,7 @@
       (setq skkdic-okuri-nasi-entries-count (length skkdic-okuri-nasi-entries))
       (progress-reporter-done progress))))
 
-(defun skkdic-convert-okuri-nasi (skkbuf buf)
+(defun skkdic-convert-okuri-nasi (skkbuf buf no-reduction)
   (with-current-buffer buf
     (insert ";; Setting okuri-nasi entries.\n"
 	    "(skkdic-set-okuri-nasi\n")
@@ -311,7 +311,9 @@
           (setq count (1+ count))
           (progress-reporter-update progress count)
 	  (if (setq candidates
-		    (skkdic-reduced-candidates skkbuf kana candidates))
+		    (if no-reduction
+                        candidates
+                      (skkdic-reduced-candidates skkbuf kana candidates)))
 	      (progn
 		(insert "\"" kana)
 		(while candidates
@@ -322,10 +324,12 @@
       (progress-reporter-done progress))
     (insert ")\n\n")))
 
-(defun skkdic-convert (filename &optional dirname)
+(defun skkdic-convert (filename &optional dirname no-reduction)
   "Generate Emacs Lisp file from Japanese dictionary file FILENAME.
 The format of the dictionary file should be the same as SKK dictionaries.
-Saves the output as `ja-dic-filename', in directory DIRNAME (if specified)."
+Saves the output as `ja-dic-filename', in directory DIRNAME (if specified).
+When NO-REDUCTION is t, then the dictionary is not reduced.
+"
   (interactive "FSKK dictionary file: ")
   (let* ((skkbuf (get-buffer-create " *skkdic-unannotated*"))
 	 (buf (get-buffer-create "*skkdic-work*")))
@@ -389,7 +393,7 @@ Saves the output as `ja-dic-filename', in directory DIRNAME (if specified)."
 	(skkdic-collect-okuri-nasi)
 
 	;; Convert okuri-nasi general entries.
-	(skkdic-convert-okuri-nasi skkbuf buf)
+	(skkdic-convert-okuri-nasi skkbuf buf no-reduction)
 
 	;; Postfix
 	(with-current-buffer buf
@@ -427,15 +431,21 @@ To get complete usage, invoke:
 	(message "To convert SKK-JISYO.L into skkdic.el:")
 	(message "  %% emacs -batch -l ja-dic-cnv -f batch-skkdic-convert SKK-JISYO.L")
 	(message "To convert SKK-JISYO.L into DIR/ja-dic.el:")
-	(message "  %% emacs -batch -l ja-dic-cnv -f batch-skkdic-convert -dir DIR SKK-JISYO.L"))
-    (let (targetdir filename)
+	(message "  %% emacs -batch -l ja-dic-cnv -f batch-skkdic-convert -dir DIR SKK-JISYO.L")
+        (message "To convert SKK-JISYO.L into skkdic.el without reduction:")
+        (message "  %% emacs -batch -l ja-dic-cnv -f batch-skkdic-convert SKK-JISYO.L --no-reduction"))
+    (let (targetdir filename no-reduction)
       (if (string= (car command-line-args-left) "-dir")
 	  (progn
 	    (setq command-line-args-left (cdr command-line-args-left))
 	    (setq targetdir (expand-file-name (car command-line-args-left)))
 	    (setq command-line-args-left (cdr command-line-args-left))))
+      (if (string= (car command-line-args-left) "--no-reduction")
+          (progn
+	    (setq no-reduction t)
+	    (setq command-line-args-left (cdr command-line-args-left))))
       (setq filename (expand-file-name (car command-line-args-left)))
-      (skkdic-convert filename targetdir)))
+      (skkdic-convert filename targetdir no-reduction)))
   (kill-emacs 0))
 
 
-- 
2.36.1


[-- Attachment #3: Type: text/plain, Size: 2020 bytes --]


By the way, if I may be honest, I would like to remove this reduction
process.

"名古屋" (Nagoya) [0] is the name of one of Japan's major cities and is a
proper noun.

I don't think most people, myself included, recognize that the word is a
composite of "名古" and "屋".

I am Japanese, so my sense may be different, but I recognize "New York"
as one word and "Spider-man" as one word.
In other words, instead of converting "名古" and "屋" respectively, we
want to convert "名古屋" as it is. It is stressful to have to separate
the words I imagine in my head from the words I use in Kanji
conversion. I would like to reduce that frequency at least a little.

Although the skkdic-reduced-candidates function mechanically eliminates
words that can be entered by combining them with other words, it does
not judge the importance of words, so even frequently used words like "
名古屋" are eliminated. That is very inconvenient.

My concern is that Emacs' standard Kanji conversion engine will be
regarded as useless.
Despite being based on a dictionary with a sufficient vocabulary
(SKK-JISYO.L), it generates an inconvenient dictionary by the reduction
process.
Most of the people who rated Emacs' standard kanji conversion engine as
useless are probably unaware of this fact.
I also rated the standard Emacs kanji conversion engine as
useless. Because I did not know that fact.
However, when I learned the facts, I realized that this was a
misunderstanding and that I had disrespectful feelings toward Emacs.
This is simply a disrepute due to misunderstanding.

The reduction of dictionaries would reduce the file size by less than
half. While significant, how important is this in today's computing
environment?
In my personal opinion, I feel that reducing the vocabulary of the
dictionary has more disadvantages than advantages.

My English is not very good, so I apologize if I did not convey my
intentions.

[0]: https://en.wikipedia.org/wiki/Nagoya

Best Regards,
-- 
Taiju

^ permalink raw reply related	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2022-06-10 13:50 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-03  3:16 [PATCH] Add an option to not reduce vocabulary of the Japanese Taiju HIGASHI
2022-06-03  6:12 ` Eli Zaretskii
2022-06-03  6:43   ` Taiju HIGASHI
2022-06-03 11:10     ` Eli Zaretskii
     [not found]       ` <87sfolwyzj.fsf@taiju.info>
2022-06-04  8:38         ` Eli Zaretskii
2022-06-04 11:46           ` Taiju HIGASHI
2022-06-04 13:43             ` Eli Zaretskii
2022-06-04 16:39               ` Taiju HIGASHI
2022-06-04 16:47                 ` Eli Zaretskii
2022-06-04 17:01                   ` Taiju HIGASHI
2022-06-04 17:03                     ` Eli Zaretskii
2022-06-05  3:05                     ` handa
2022-06-05 14:07                       ` Taiju HIGASHI
2022-06-06 11:52                         ` handa
2022-06-06 12:53                           ` Taiju HIGASHI
2022-06-06 14:14                             ` Lars Ingebrigtsen
2022-06-06 14:17                               ` Eli Zaretskii
2022-06-06 15:08                                 ` Taiju HIGASHI
2022-06-06 16:05                                   ` Eli Zaretskii
2022-06-07  0:47                                     ` Taiju HIGASHI
2022-06-07  1:06                                       ` Taiju HIGASHI
2022-06-07  3:50                                         ` Taiju HIGASHI
2022-06-07 10:58                                           ` Eli Zaretskii
2022-06-07  9:36                                         ` Lars Ingebrigtsen
2022-06-07 10:10                                           ` Taiju HIGASHI
2022-06-07 10:22                                             ` Lars Ingebrigtsen
2022-06-07 10:48                                       ` Eli Zaretskii
2022-06-07 12:12                                         ` Taiju HIGASHI
2022-06-07 12:41                                         ` Taiju HIGASHI
2022-06-07 13:08                                           ` Taiju HIGASHI
2022-06-09 13:10                                             ` Taiju HIGASHI
2022-06-09 13:14                                               ` Eli Zaretskii
2022-06-10 13:15                                             ` Eli Zaretskii
2022-06-10 13:50                                               ` Taiju HIGASHI
2022-06-03 23:51     ` Richard Stallman
2022-06-04 10:57       ` Taiju HIGASHI
2022-06-04 11:19         ` Taiju HIGASHI
2022-06-05 22:53         ` Richard Stallman
2022-06-06  0:05           ` Taiju HIGASHI
2022-06-03 23:52 ` Richard Stallman
2022-06-04  6:25   ` Eli Zaretskii
2022-06-04 12:36     ` Taiju HIGASHI

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).