unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Max Mikhanosha <max.mikhanosha@protonmail.com>
To: "emacs-devel@gnu.org" <emacs-devel@gnu.org>
Subject: Bugfix for utf-8 XTerm/MinTTY and (set-input-meta-mode t)
Date: Tue, 01 Jun 2021 16:19:40 +0000	[thread overview]
Message-ID: <C-hzVz5np2upcH8IBAZRHpYkg10OuQmyYQKucOL6ntbxtL205LtQQFXNfcipmcoavl0VReVkujYEqTCWXfLZiihuaOyEUZIPGU2ciyUYHqA=@protonmail.com> (raw)

Emacs incorrectly handles (set-input-meta-mode t) (the meta in the 8th bit of input) when terminal is in UTF-8 mode.

Both XTerm and MinTTY, when configured to send meta modifier as 8th bit while in utf-8 mode, will first add 8th bit, and then encode resulting character with utf-8. For example Meta-X is encoded as ?x+120 = #248 codepoint, encoded as 0xc3,0xb8

But Emacs handles meta modifier in the 8th bit in tty_read_avail_input, before decoding the raw keyboard input.

So it erroneously treats 0xc3,0xb8 input as two ordinary ASCII characters with meta modifier set, stripping the 8th bit and garbling the input.

This problem had existed for a long time, and had frustrated at least a few hundred people, as can be seen by the view count on stackoverflow article that comes up when googling "emacs utf8 xterm"

Below patch fixes this bug, by making 8th bit meta key handling to work correctly in utf8 mode.

I have tested it with xterm and mintty and meta keys, and meta-control keys now work correctly regardless if terminals are in utf-8 mode.

Diff against Emacs-26 branch pasted below

diff --git a/src/coding.c b/src/coding.c
index 078c1c4e6a..743fceb32c 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -5989,6 +5989,11 @@ raw_text_coding_system_p (struct coding_system *coding)
 	  && coding->encoder == encode_coding_raw_text) ? true : false;
 }

+bool utf_8_input_coding_system_p(struct coding_system *coding)
+{
+  return (coding->decoder == decode_coding_utf_8) ? true : false;
+}
+

 /* If CODING_SYSTEM doesn't specify end-of-line format, return one of
    the subsidiary that has the same eol-spec as PARENT (if it is not
diff --git a/src/coding.h b/src/coding.h
index aab8c2d438..6124330a1f 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -702,6 +702,7 @@ extern Lisp_Object encode_file_name (Lisp_Object);
 extern Lisp_Object decode_file_name (Lisp_Object);
 extern Lisp_Object raw_text_coding_system (Lisp_Object);
 extern bool raw_text_coding_system_p (struct coding_system *);
+extern bool utf_8_input_coding_system_p (struct coding_system *);
 extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
 extern Lisp_Object complement_process_encoding_system (Lisp_Object);

diff --git a/src/keyboard.c b/src/keyboard.c
index aa3448439b..84acf4a998 100644
--- a/src/keyboard.c
+++ b/src/keyboard.c
@@ -2235,14 +2235,16 @@ read_decoded_event_from_main_queue (struct timespec *end_time,
 	return nextevt;		/* No decoding needed.  */
       else
 	{
+	  struct coding_system *coding = TERMINAL_KEYBOARD_CODING (terminal);
+	  bool utf8_input_terminal = utf_8_input_coding_system_p (coding);
 	  int meta_key = terminal->display_info.tty->meta_key;
+
 	  eassert (n < MAX_ENCODED_BYTES);
 	  events[n++] = nextevt;
+
 	  if (NATNUMP (nextevt)
-	      && XINT (nextevt) < (meta_key == 1 ? 0x80 : 0x100))
+	      && XINT (nextevt) < ((meta_key == 1 && !utf8_input_terminal) ? 0x80 : 0x100))
 	    { /* An encoded byte sequence, let's try to decode it.  */
-	      struct coding_system *coding
-		= TERMINAL_KEYBOARD_CODING (terminal);

 	      if (raw_text_coding_system_p (coding))
 		{
@@ -2253,12 +2255,13 @@ read_decoded_event_from_main_queue (struct timespec *end_time,
 		}
 	      else
 		{
+
 		  unsigned char src[MAX_ENCODED_BYTES];
 		  unsigned char dest[MAX_ENCODED_BYTES * MAX_MULTIBYTE_LENGTH];
 		  int i;
 		  for (i = 0; i < n; i++)
 		    src[i] = XINT (events[i]);
-		  if (meta_key != 2)
+		  if (!utf8_input_terminal && meta_key != 2)
 		    for (i = 0; i < n; i++)
 		      src[i] &= ~0x80;
 		  coding->destination = dest;
@@ -2275,8 +2278,21 @@ read_decoded_event_from_main_queue (struct timespec *end_time,
 		      const unsigned char *p = coding->destination;
 		      eassert (coding->carryover_bytes == 0);
 		      n = 0;
-		      while (n < coding->produced_char)
-			events[n++] = make_number (STRING_CHAR_ADVANCE (p));
+                      while (n < coding->produced_char)
+                        {
+                          int c = STRING_CHAR_ADVANCE (p);
+			  if (utf8_input_terminal)
+			    {
+			      /* put meta modifier on the key */
+			      int modifier = 0;
+			      if (meta_key == 1 && c < 0x100 && (c & 0x80))
+				modifier = meta_modifier;
+			      if (meta_key != 2)
+				c &= ~0x80;
+			      c |= modifier;
+			    }
+			  events[n++] = make_number (c);
+                        }
 		    }
 		}
 	    }
@@ -7118,16 +7134,31 @@ tty_read_avail_input (struct terminal *terminal,
 #endif /* not MSDOS */
 #endif /* not WINDOWSNT */

+  bool utf8_input_terminal = utf_8_input_coding_system_p (TERMINAL_KEYBOARD_CODING(terminal));
+
   for (i = 0; i < nread; i++)
     {
       struct input_event buf;
       EVENT_INIT (buf);
       buf.kind = ASCII_KEYSTROKE_EVENT;
       buf.modifiers = 0;
-      if (tty->meta_key == 1 && (cbuf[i] & 0x80))
-        buf.modifiers = meta_modifier;
-      if (tty->meta_key != 2)
-        cbuf[i] &= ~0x80;
+
+      /* Both XTerm and MinTTY in utf8:true + MetaSendEscape:false mode
+         send Meta + ASCII letters by first adding 0x80, and then UTF-8
+         encoding the result.
+
+         Therefore trying to detect 0x80 meta key flag now not only
+         confuses meta key with UTF-8 encoding, but also loses
+         information by stripping the 8th bit from UTF-8 input before
+         decoding
+      */
+      if (!utf8_input_terminal)
+	{
+	  if (tty->meta_key == 1 && (cbuf[i] & 0x80))
+	    buf.modifiers = meta_modifier;
+	  if (tty->meta_key != 2)
+	    cbuf[i] &= ~0x80;
+	}

       buf.code = cbuf[i];
       /* Set the frame corresponding to the active tty.  Note that the





             reply	other threads:[~2021-06-01 16:19 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-01 16:19 Max Mikhanosha [this message]
2021-06-01 16:51 ` Bugfix for utf-8 XTerm/MinTTY and (set-input-meta-mode t) Eli Zaretskii
2021-06-01 17:28   ` Max Mikhanosha
2021-06-01 17:38     ` Eli Zaretskii
2021-06-01 18:01       ` Max Mikhanosha
2021-06-01 18:18         ` Eli Zaretskii
2021-06-01 18:35           ` Max Mikhanosha
2021-06-01 18:46             ` Eli Zaretskii
2021-06-02  9:22               ` Max Mikhanosha
2021-06-02 12:16                 ` Andreas Schwab
2021-06-03  5:42                   ` Max Mikhanosha
2021-06-05 14:20                     ` Eli Zaretskii
2021-06-01 17:29   ` Eli Zaretskii
2021-06-01 17:45     ` Max Mikhanosha
2021-06-01 17:52       ` Eli Zaretskii
2021-06-01 18:10         ` Max Mikhanosha
2021-06-01 17:04 ` Andreas Schwab
2021-06-01 17:36   ` Max Mikhanosha
2021-06-01 20:06 ` Stefan Monnier
2021-06-02 10:21   ` Max Mikhanosha

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='C-hzVz5np2upcH8IBAZRHpYkg10OuQmyYQKucOL6ntbxtL205LtQQFXNfcipmcoavl0VReVkujYEqTCWXfLZiihuaOyEUZIPGU2ciyUYHqA=@protonmail.com' \
    --to=max.mikhanosha@protonmail.com \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).