all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: npostavs@users.sourceforge.net
To: Eli Zaretskii <eliz@gnu.org>
Cc: 25288@debbugs.gnu.org, fvamail@gmail.com
Subject: bug#25288: 25.1; term, ansi-term, broken output of utf8 text
Date: Wed, 28 Dec 2016 21:37:19 -0500	[thread overview]
Message-ID: <87inq38nq8.fsf@users.sourceforge.net> (raw)
In-Reply-To: <83h95nvojh.fsf@gnu.org> (Eli Zaretskii's message of "Wed, 28 Dec 2016 21:31:14 +0200")

[-- Attachment #1: Type: text/plain, Size: 727 bytes --]

tags 25288 patch
quit

Eli Zaretskii <eliz@gnu.org> writes:

>> From: npostavs@users.sourceforge.net
>> Date: Wed, 28 Dec 2016 14:10:30 -0500
>> Cc: 25288@debbugs.gnu.org
>> 
>> Is there a way to recognize incomplete decoding from lisp?  I can't see
>> any.
>
> If you know the encoding of the byte stream (and term.el must, since
> it evidently decodes it later on), then you could probably use
> char-charset, after decoding: if you get 'eight-bit, then you've got
> incomplete byte sequence.  But I didn't try that.

That should work at least for encodings like utf-8 for which undecoded
bytes are not ascii.  I guess parsing of escape codes would only work on
such encodings anyway, so it should be fine.  Patch attached.


[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 4666 bytes --]

From 6b052065c60406df5b4cd54f698f78594a010922 Mon Sep 17 00:00:00 2001
From: Noam Postavsky <npostavs@gmail.com>
Date: Wed, 28 Dec 2016 20:13:20 -0500
Subject: [PATCH v1] Handle multibyte chars spanning chunks in term.el

* lisp/term.el (term-terminal-undecoded-bytes): New variable.
(term-mode): Make it buffer local.  Don't make `term-terminal-parameter'
buffer-local twice.
(term-emulate-terminal): Check for bytes of incompletely decoded
characters, and save them until the next call when they can be fully
decoded (Bug#25288).
---
 lisp/term.el | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/lisp/term.el b/lisp/term.el
index d3d6390..696e39f 100644
--- a/lisp/term.el
+++ b/lisp/term.el
@@ -341,6 +341,7 @@
 (defconst term-protocol-version "0.96")
 
 (eval-when-compile (require 'ange-ftp))
+(eval-when-compile (require 'cl-lib))
 (require 'ring)
 (require 'ehelp)
 
@@ -404,6 +405,7 @@ term-terminal-state
 (defvar term-kill-echo-list nil
   "A queue of strings whose echo we want suppressed.")
 (defvar term-terminal-parameter)
+(defvar term-terminal-undecoded-bytes nil)
 (defvar term-terminal-previous-parameter)
 (defvar term-current-face 'term)
 (defvar term-scroll-start 0 "Top-most line (inclusive) of scrolling region.")
@@ -1015,7 +1017,6 @@ term-mode
 
   ;; These local variables are set to their local values:
   (make-local-variable 'term-saved-home-marker)
-  (make-local-variable 'term-terminal-parameter)
   (make-local-variable 'term-saved-cursor)
   (make-local-variable 'term-prompt-regexp)
   (make-local-variable 'term-input-ring-size)
@@ -1052,6 +1053,7 @@ term-mode
   (make-local-variable 'term-ansi-current-invisible)
 
   (make-local-variable 'term-terminal-parameter)
+  (make-local-variable 'term-terminal-undecoded-bytes)
   (make-local-variable 'term-terminal-previous-parameter)
   (make-local-variable 'term-terminal-previous-parameter-2)
   (make-local-variable 'term-terminal-previous-parameter-3)
@@ -2748,6 +2750,10 @@ term-emulate-terminal
 
 	  (when term-log-buffer
 	    (princ str term-log-buffer))
+          (when term-terminal-undecoded-bytes
+            (setq str (concat term-terminal-undecoded-bytes str))
+            (setq str-length (length str))
+            (setq term-terminal-undecoded-bytes nil))
 	  (cond ((eq term-terminal-state 4) ;; Have saved pending output.
 		 (setq str (concat term-terminal-parameter str))
 		 (setq term-terminal-parameter nil)
@@ -2763,13 +2769,6 @@ term-emulate-terminal
 				       str i))
 		   (when (not funny) (setq funny str-length))
 		   (cond ((> funny i)
-			  ;; Decode the string before counting
-			  ;; characters, to avoid garbling of certain
-			  ;; multibyte characters (bug#1006).
-			  (setq decoded-substring
-				(decode-coding-string
-				 (substring str i funny)
-				 locale-coding-system))
 			  (cond ((eq term-terminal-state 1)
 				 ;; We are in state 1, we need to wrap
 				 ;; around.  Go to the beginning of
@@ -2778,7 +2777,31 @@ term-emulate-terminal
 				 (term-down 1 t)
 				 (term-move-columns (- (term-current-column)))
 				 (setq term-terminal-state 0)))
+			  ;; Decode the string before counting
+			  ;; characters, to avoid garbling of certain
+			  ;; multibyte characters (bug#1006).
+			  (setq decoded-substring
+				(decode-coding-string
+				 (substring str i funny)
+				 locale-coding-system))
 			  (setq count (length decoded-substring))
+                          ;; Check for multibyte characters that ends
+                          ;; before end of string, and save it for
+                          ;; next time.
+                          (when (= funny str-length)
+                            (let ((partial 0))
+                              (while (eq (char-charset (aref decoded-substring
+                                                             (- count 1 partial)))
+                                         'eight-bit)
+                                (cl-incf partial))
+                              (when (> partial 0)
+                                (setq term-terminal-undecoded-bytes
+                                      (substring decoded-substring (- partial)))
+                                (setq decoded-substring
+                                      (substring decoded-substring 0 (- partial)))
+                                (cl-decf str-length partial)
+                                (cl-decf count partial)
+                                (cl-decf funny partial))))
 			  (setq temp (- (+ (term-horizontal-column) count)
 					term-width))
 			  (cond ((or term-suppress-hard-newline (<= temp 0)))
-- 
2.9.3


  reply	other threads:[~2016-12-29  2:37 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-28 10:41 bug#25288: 25.1; term, ansi-term, broken output of utf8 text Vjacheslav
2016-12-28 19:10 ` npostavs
2016-12-28 19:31   ` Eli Zaretskii
2016-12-29  2:37     ` npostavs [this message]
2016-12-29 16:06       ` Eli Zaretskii
2017-01-03 14:05         ` npostavs

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87inq38nq8.fsf@users.sourceforge.net \
    --to=npostavs@users.sourceforge.net \
    --cc=25288@debbugs.gnu.org \
    --cc=eliz@gnu.org \
    --cc=fvamail@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.