From: Eli Zaretskii <eliz@gnu.org>
To: dmoncayo@gmail.com, lekktu@gmail.com
Cc: 12055@debbugs.gnu.org
Subject: bug#12055: Re: bug#12055: 24.1.50; Characters "á" and "é" are not correctly displayed on a Windows terminal
Date: Sat, 28 Jul 2012 13:06:30 +0300 [thread overview]
Message-ID: <83a9yki4ih.fsf@gnu.org> (raw)
In-Reply-To: <83d33gia5u.fsf@gnu.org>
> Date: Sat, 28 Jul 2012 11:04:29 +0300
> From: Eli Zaretskii <eliz@gnu.org>
> Cc: lekktu@gmail.com, 12055@debbugs.gnu.org
>
> > Date: Sat, 28 Jul 2012 03:12:12 +0200
> > From: Dani Moncayo <dmoncayo@gmail.com>
> > Cc: lekktu@gmail.com, 12055@debbugs.gnu.org
> >
> > > Please
> > > post here the exact output, and please tell for each pair of such
> > > messages which character did you type.
> >
> > Sorry for the delay. I've not had time until now.
> >
> > Here is my data:
>
> Thanks to both of you. Now I see that my theory is correct, and I can
> sit down and code the solution for this problem.
Please try the patch below. It works for me.
Please try it also when Unicode input is not used (it is by default on
Windows NT and later, as result of this patch). You can do that by
forcing w32_console_unicode_input to zero (either by modifying the
source of w32console.c and rebuilding, or by setting the variable's
value in GDB.
TIA
=== modified file 'lisp/international/mule-cmds.el'
--- lisp/international/mule-cmds.el 2012-07-25 23:11:23 +0000
+++ lisp/international/mule-cmds.el 2012-07-28 09:43:40 +0000
@@ -2655,23 +2655,29 @@ See also `locale-charset-language-names'
;; On Windows, override locale-coding-system,
;; default-file-name-coding-system, keyboard-coding-system,
- ;; terminal-coding-system with system codepage.
+ ;; terminal-coding-system with the appropriate codepages.
(when (boundp 'w32-ansi-code-page)
- (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
- (when (coding-system-p code-page-coding)
- (unless frame (setq locale-coding-system code-page-coding))
- (set-keyboard-coding-system code-page-coding frame)
- (set-terminal-coding-system code-page-coding frame)
- ;; Set default-file-name-coding-system last, so that Emacs
- ;; doesn't try to use cpNNNN when it defines keyboard and
- ;; terminal encoding. That's because the above two lines
- ;; will want to load code-pages.el, where cpNNNN are
- ;; defined; if default-file-name-coding-system were set to
- ;; cpNNNN while these two lines run, Emacs will want to use
- ;; it for encoding the file name it wants to load. And that
- ;; will fail, since cpNNNN is not yet usable until
- ;; code-pages.el finishes loading.
- (setq default-file-name-coding-system code-page-coding))))
+ (let ((ansi-code-page-coding (intern (format "cp%d" w32-ansi-code-page)))
+ (oem-code-page-coding
+ (intern (format "cp%d" (w32-get-console-codepage))))
+ ansi-cs-p oem-cs-p)
+ (and (coding-system-p ansi-code-page-coding)
+ (setq ansi-cs-p t))
+ (and (coding-system-p oem-code-page-coding)
+ (setq oem-cs-p t))
+ ;; Set the keyboard and display encoding to either the current
+ ;; ANSI codepage of the OEM codepage, depending on whether
+ ;; this is a GUI or a TTY frame.
+ (when ansi-cs-p
+ (unless frame (setq locale-coding-system ansi-code-page-coding))
+ (when (display-graphic-p frame)
+ (set-keyboard-coding-system ansi-code-page-coding frame)
+ (set-terminal-coding-system ansi-code-page-coding frame))
+ (setq default-file-name-coding-system ansi-code-page-coding))
+ (when oem-cs-p
+ (unless (display-graphic-p frame)
+ (set-keyboard-coding-system oem-code-page-coding frame)
+ (set-terminal-coding-system oem-code-page-coding frame)))))
(when (eq system-type 'darwin)
;; On Darwin, file names are always encoded in utf-8, no matter
=== modified file 'src/w32console.c'
--- src/w32console.c 2012-06-28 07:50:27 +0000
+++ src/w32console.c 2012-07-28 09:48:41 +0000
@@ -37,6 +37,7 @@ along with GNU Emacs. If not, see <http
#include "termhooks.h"
#include "termchar.h"
#include "dispextern.h"
+#include "w32heap.h" /* for os_subtype */
#include "w32inevt.h"
/* from window.c */
@@ -67,6 +68,7 @@ static CONSOLE_CURSOR_INFO prev_console_
#endif
HANDLE keyboard_handle;
+int w32_console_unicode_input;
/* Setting this as the ctrl handler prevents emacs from being killed when
@@ -786,6 +788,11 @@ initialize_w32_display (struct terminal
info.srWindow.Left);
}
+ if (os_subtype == OS_NT)
+ w32_console_unicode_input = 1;
+ else
+ w32_console_unicode_input = 0;
+
/* Setup w32_display_info structure for this frame. */
w32_initialize_display_info (build_string ("Console"));
=== modified file 'src/w32inevt.c'
--- src/w32inevt.c 2012-05-26 11:58:19 +0000
+++ src/w32inevt.c 2012-07-28 09:57:11 +0000
@@ -41,6 +41,7 @@ along with GNU Emacs. If not, see <http
#include "termchar.h"
#include "w32heap.h"
#include "w32term.h"
+#include "w32inevt.h"
/* stdin, from w32console.c */
extern HANDLE keyboard_handle;
@@ -61,6 +62,15 @@ static INPUT_RECORD *queue_ptr = event_q
/* Temporarily store lead byte of DBCS input sequences. */
static char dbcs_lead = 0;
+static inline BOOL
+w32_read_console_input (HANDLE h, INPUT_RECORD *rec, DWORD recsize,
+ DWORD *waiting)
+{
+ return (w32_console_unicode_input
+ ? ReadConsoleInputW (h, rec, recsize, waiting)
+ : ReadConsoleInputA (h, rec, recsize, waiting));
+}
+
static int
fill_queue (BOOL block)
{
@@ -80,8 +90,8 @@ fill_queue (BOOL block)
return 0;
}
- rc = ReadConsoleInput (keyboard_handle, event_queue, EVENT_QUEUE_SIZE,
- &events_waiting);
+ rc = w32_read_console_input (keyboard_handle, event_queue, EVENT_QUEUE_SIZE,
+ &events_waiting);
if (!rc)
return -1;
queue_ptr = event_queue;
@@ -224,7 +234,7 @@ w32_kbd_patch_key (KEY_EVENT_RECORD *eve
#endif
/* On NT, call ToUnicode instead and then convert to the current
- locale's default codepage. */
+ console input codepage. */
if (os_subtype == OS_NT)
{
WCHAR buf[128];
@@ -233,14 +243,9 @@ w32_kbd_patch_key (KEY_EVENT_RECORD *eve
keystate, buf, 128, 0);
if (isdead > 0)
{
- char cp[20];
- int cpId;
+ int cpId = GetConsoleCP ();
event->uChar.UnicodeChar = buf[isdead - 1];
-
- GetLocaleInfo (GetThreadLocale (),
- LOCALE_IDEFAULTANSICODEPAGE, cp, 20);
- cpId = atoi (cp);
isdead = WideCharToMultiByte (cpId, 0, buf, isdead,
ansi_code, 4, NULL, NULL);
}
@@ -447,26 +452,34 @@ key_event (KEY_EVENT_RECORD *event, stru
}
else if (event->uChar.AsciiChar > 0)
{
+ /* Pure ASCII characters < 128. */
emacs_ev->kind = ASCII_KEYSTROKE_EVENT;
emacs_ev->code = event->uChar.AsciiChar;
}
- else if (event->uChar.UnicodeChar > 0)
+ else if (event->uChar.UnicodeChar > 0
+ && w32_console_unicode_input)
{
+ /* Unicode codepoint; only valid if we are using Unicode
+ console input mode. */
emacs_ev->kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
emacs_ev->code = event->uChar.UnicodeChar;
}
else
{
- /* Fallback for non-Unicode versions of Windows. */
+ /* Fallback handling of non-ASCII characters for non-Unicode
+ versions of Windows, and for non-Unicode input on NT
+ family of Windows. Only characters in the current
+ console codepage are supported by this fallback. */
wchar_t code;
char dbcs[2];
- char cp[20];
int cpId;
- /* Get the codepage to interpret this key with. */
- GetLocaleInfo (GetThreadLocale (),
- LOCALE_IDEFAULTANSICODEPAGE, cp, 20);
- cpId = atoi (cp);
+ /* Get the current console input codepage to interpret this
+ key with. Note that the system defaults for the OEM
+ codepage could have been changed by calling SetConsoleCP
+ or w32-set-console-codepage, so using GetLocaleInfo to
+ get LOCALE_IDEFAULTCODEPAGE is not TRT here. */
+ cpId = GetConsoleCP ();
dbcs[0] = dbcs_lead;
dbcs[1] = event->uChar.AsciiChar;
@@ -501,6 +514,7 @@ key_event (KEY_EVENT_RECORD *event, stru
}
else
{
+ /* Function keys and other non-character keys. */
emacs_ev->kind = NON_ASCII_KEYSTROKE_EVENT;
emacs_ev->code = event->wVirtualKeyCode;
}
=== modified file 'src/w32inevt.h'
--- src/w32inevt.h 2012-01-19 07:21:25 +0000
+++ src/w32inevt.h 2012-07-28 08:39:49 +0000
@@ -19,6 +19,8 @@ along with GNU Emacs. If not, see <http
#ifndef EMACS_W32INEVT_H
#define EMACS_W32INEVT_H
+extern int w32_console_unicode_input;
+
extern int w32_console_read_socket (struct terminal *term, int numchars,
struct input_event *hold_quit);
extern void w32_console_mouse_position (FRAME_PTR *f, int insist,
next prev parent reply other threads:[~2012-07-28 10:06 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-07-26 12:13 bug#12055: 24.1.50; Characters "á" and "é" are not correctly displayed on a Windows terminal Dani Moncayo
2012-07-26 16:13 ` Eli Zaretskii
2012-07-26 16:24 ` Juanma Barranquero
2012-07-26 16:42 ` bug#12055: " Eli Zaretskii
2012-07-26 16:49 ` Juanma Barranquero
2012-07-26 17:18 ` bug#12055: " Eli Zaretskii
2012-07-26 18:09 ` Eli Zaretskii
2012-07-26 18:42 ` Juanma Barranquero
2012-07-26 18:29 ` Juanma Barranquero
2012-07-26 20:03 ` bug#12055: " Eli Zaretskii
2012-07-26 22:40 ` Dani Moncayo
2012-07-27 6:45 ` bug#12055: " Eli Zaretskii
2012-07-27 8:35 ` Dani Moncayo
2012-07-27 9:04 ` bug#12055: " Eli Zaretskii
2012-07-27 15:12 ` Eli Zaretskii
2012-07-27 16:46 ` Jason Rumney
2012-07-27 18:03 ` Eli Zaretskii
2012-07-27 18:22 ` Eli Zaretskii
2012-07-27 23:45 ` Juanma Barranquero
2012-07-28 1:12 ` Dani Moncayo
2012-07-28 8:04 ` bug#12055: " Eli Zaretskii
2012-07-28 10:06 ` Eli Zaretskii [this message]
2012-07-28 11:55 ` Dani Moncayo
2012-07-28 12:23 ` bug#12055: " Eli Zaretskii
2012-07-28 12:49 ` Dani Moncayo
2012-07-28 15:02 ` bug#12055: " Eli Zaretskii
2012-07-28 12:30 ` bug#12055: " Eli Zaretskii
2012-07-28 13:57 ` Dani Moncayo
2012-07-28 16:07 ` Juanma Barranquero
2012-07-28 16:12 ` Dani Moncayo
2012-07-28 16:11 ` Juanma Barranquero
2012-07-28 16:44 ` bug#12055: " Eli Zaretskii
2012-07-28 17:01 ` Eli Zaretskii
2012-07-26 16:44 ` Dani Moncayo
2012-07-28 14:12 ` Dani Moncayo
2012-07-28 15:01 ` bug#12055: " Eli Zaretskii
2012-07-28 15:23 ` Dani Moncayo
2012-07-28 15:34 ` Dani Moncayo
2012-07-28 16:27 ` bug#12055: " Eli Zaretskii
2012-07-28 15:35 ` Eli Zaretskii
2012-07-28 15:46 ` Dani Moncayo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/emacs/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=83a9yki4ih.fsf@gnu.org \
--to=eliz@gnu.org \
--cc=12055@debbugs.gnu.org \
--cc=dmoncayo@gmail.com \
--cc=lekktu@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.savannah.gnu.org/cgit/emacs.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).