unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Stefan Monnier <monnier@iro.umontreal.ca>
To: Eli Zaretskii <eliz@gnu.org>
Cc: 36431@debbugs.gnu.org
Subject: bug#36431: Crash in marker.c:337
Date: Wed, 03 Jul 2019 00:21:54 -0400	[thread overview]
Message-ID: <jwv7e8zkau5.fsf-monnier+emacs@gnu.org> (raw)
In-Reply-To: <83ftnrf87e.fsf@gnu.org> (Eli Zaretskii's message of "Sun, 30 Jun 2019 17:39:49 +0300")

> AFAICT, this patch moves the call to move_gap_both from a fragment
> where we must decode the inserted text to a fragment where such a
> decoding might not be necessary.  If I'm right, then this makes
> insert-file-contents slower in some cases, because moving the gap
> might be very expensive with large buffers.

Here's an alternative patch which doesn't suffer from this problem but
also eliminates the transiently-inconsistent multibyte buffer situation.


        Stefan


diff --git a/src/fileio.c b/src/fileio.c
index 2825c1b54c..9ed1fcf8ca 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -3705,6 +3705,7 @@ because (1) it preserves some marker positions and (2) it puts less data
 	  CHECK_CODING_SYSTEM (Vcoding_system_for_read);
 	  Fset (Qbuffer_file_coding_system, Vcoding_system_for_read);
 	}
+      eassert (inserted == 0);
       goto notfound;
     }
 
@@ -3731,7 +3732,10 @@ because (1) it preserves some marker positions and (2) it puts less data
       not_regular = 1;
 
       if (! NILP (visit))
-	goto notfound;
+        {
+          eassert (inserted == 0);
+	  goto notfound;
+        }
 
       if (! NILP (replace) || ! NILP (beg) || ! NILP (end))
 	xsignal2 (Qfile_error,
@@ -4399,10 +4403,10 @@ because (1) it preserves some marker positions and (2) it puts less data
   if (how_much < 0)
     report_file_error ("Read error", orig_filename);
 
-  /* Make the text read part of the buffer.  */
-  insert_from_gap_1 (inserted, inserted, false);
-
- notfound:
+ notfound: ;
+  Lisp_Object multibyte
+    = BVAR (current_buffer, enable_multibyte_characters);
+  bool ingap = true; /* Bytes are currently in the gap.  */
 
   if (NILP (coding_system))
     {
@@ -4411,6 +4415,7 @@ because (1) it preserves some marker positions and (2) it puts less data
 
 	 Note that we can get here only if the buffer was empty
 	 before the insertion.  */
+      eassert (Z == BEG);
 
       if (!NILP (Vcoding_system_for_read))
 	coding_system = Vcoding_system_for_read;
@@ -4421,8 +4426,6 @@ because (1) it preserves some marker positions and (2) it puts less data
 	     enable-multibyte-characters directly here without taking
 	     care of marker adjustment.  By this way, we can run Lisp
 	     program safely before decoding the inserted text.  */
-          Lisp_Object multibyte
-            = BVAR (current_buffer, enable_multibyte_characters);
           Lisp_Object undo_list = BVAR (current_buffer, undo_list);
 	  ptrdiff_t count1 = SPECPDL_INDEX ();
 
@@ -4430,6 +4433,10 @@ because (1) it preserves some marker positions and (2) it puts less data
 	  bset_undo_list (current_buffer, Qt);
 	  record_unwind_protect (restore_buffer, Fcurrent_buffer ());
 
+          /* Make the text read part of the buffer.  */
+          insert_from_gap_1 (inserted, inserted, false);
+          ingap = false;
+
 	  if (inserted > 0 && ! NILP (Vset_auto_coding_function))
 	    {
 	      coding_system = call2 (Vset_auto_coding_function,
@@ -4455,15 +4462,10 @@ because (1) it preserves some marker positions and (2) it puts less data
           adjust_overlays_for_delete (BEG, Z - BEG);
           set_buffer_intervals (current_buffer, NULL);
           TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
-
-          /* Change the buffer's multibyteness directly.  We used to do this
-             from within unbind_to, but it was unsafe since the bytes
-             may contain invalid sequences for a multibyte buffer (which is OK
-             here since we'll decode them before anyone else gets to see
-             them, but is dangerous when we're doing a non-local exit).  */
-          bset_enable_multibyte_characters (current_buffer, multibyte);
           bset_undo_list (current_buffer, undo_list);
           inserted = Z_BYTE - BEG_BYTE;
+          /* The bytes may be invalid for a multibyte buffer, so we can't
+             restore the multibyteness yet.  */
         }
 
       if (NILP (coding_system))
@@ -4471,7 +4473,7 @@ because (1) it preserves some marker positions and (2) it puts less data
       else
 	CHECK_CODING_SYSTEM (coding_system);
 
-      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
+      if (NILP (multibyte))
 	/* We must suppress all character code conversion except for
 	   end-of-line conversion.  */
 	coding_system = raw_text_coding_system (coding_system);
@@ -4490,33 +4492,51 @@ because (1) it preserves some marker positions and (2) it puts less data
 	{
 	  /* Visiting a file with these coding system makes the buffer
 	     unibyte.  */
-	  if (inserted > 0)
+          if (!ingap)
+            multibyte = Qnil;
+	  else if (inserted > 0)
 	    bset_enable_multibyte_characters (current_buffer, Qnil);
-	  else
+          else
 	    Fset_buffer_multibyte (Qnil);
 	}
     }
 
-  coding.dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
+  coding.dst_multibyte = !NILP (multibyte);
   if (CODING_MAY_REQUIRE_DECODING (&coding)
       && (inserted > 0 || CODING_REQUIRE_FLUSHING (&coding)))
     {
-      move_gap_both (PT, PT_BYTE);
-      GAP_SIZE += inserted;
-      ZV_BYTE -= inserted;
-      Z_BYTE -= inserted;
-      ZV -= inserted;
-      Z -= inserted;
+      if (ingap)
+        { /* Text is at beginning of gap, move it to the end.  */
+          memmove (GAP_END_ADDR - inserted, GPT_ADDR, inserted);
+        }
+      else
+        { /* Text is inside the buffer; move it to end of the gap.  */
+          move_gap_both (PT, PT_BYTE);
+	  eassert (inserted == Z_BYTE - BEG_BYTE);
+          GAP_SIZE += inserted;
+          ZV = Z = GPT = BEG;
+          ZV_BYTE = Z_BYTE = GPT_BYTE = BEG_BYTE;
+          /* Now we are safe to change the buffer's multibyteness directly.  */
+          bset_enable_multibyte_characters (current_buffer, multibyte);
+        }
+
       decode_coding_gap (&coding, inserted);
       inserted = coding.produced_char;
       coding_system = CODING_ID_NAME (coding.id);
     }
-  else if (inserted > 0)
+  else if (inserted > 0 && ingap)
     {
+      /* Make the text read part of the buffer.  */
+      eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
+      insert_from_gap_1 (inserted, inserted, false);
       invalidate_buffer_caches (current_buffer, PT, PT + inserted);
       adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted,
 			   inserted);
     }
+  else if (!ingap)
+    { /* Apparently, no decoding needed, so just set the bytenesss.  */
+      bset_enable_multibyte_characters (current_buffer, multibyte);
+    }
 
   /* Call after-change hooks for the inserted text, aside from the case
      of normal visiting (not with REPLACE), which is done in a new buffer






  parent reply	other threads:[~2019-07-03  4:21 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-29 11:17 bug#36431: Crash in marker.c:337 Werner LEMBERG
2019-06-29 12:13 ` Eli Zaretskii
2019-06-29 12:20   ` Eli Zaretskii
2019-06-29 22:56     ` Stefan Monnier
2019-06-30  7:26       ` Werner LEMBERG
2019-06-30 13:14         ` Stefan Monnier
2019-07-02 16:29           ` Stefan Monnier
2019-06-30 14:52         ` Eli Zaretskii
2019-06-30 14:39       ` Eli Zaretskii
2019-06-30 14:59         ` Stefan Monnier
2019-06-30 15:16           ` Eli Zaretskii
2019-06-30 15:53             ` Stefan Monnier
2019-07-02 17:04         ` Stefan Monnier
2019-07-02 17:22           ` Stefan Monnier
2019-07-02 17:37             ` Stefan Monnier
2019-07-02 17:42               ` Eli Zaretskii
2019-07-02 17:55                 ` Stefan Monnier
2019-07-02 17:39           ` Eli Zaretskii
2019-07-02 17:51             ` Stefan Monnier
2019-07-02 18:27               ` Eli Zaretskii
2019-07-02 19:44                 ` Stefan Monnier
2019-07-02 20:15                   ` Eli Zaretskii
2019-07-02 21:00                     ` Stefan Monnier
2019-07-03  4:49                       ` Eli Zaretskii
2019-07-03 16:19                         ` Stefan Monnier
2019-07-03 16:33                           ` Eli Zaretskii
2019-07-03  4:21         ` Stefan Monnier [this message]
2019-07-03  4:55           ` Eli Zaretskii
2019-07-03  6:20           ` Werner LEMBERG
2019-07-03  6:29             ` Eli Zaretskii
2019-07-03  6:46               ` Werner LEMBERG
2019-07-03  7:14                 ` Eli Zaretskii
2019-07-03 16:08             ` Stefan Monnier
2019-07-09 21:04             ` Stefan Monnier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=jwv7e8zkau5.fsf-monnier+emacs@gnu.org \
    --to=monnier@iro.umontreal.ca \
    --cc=36431@debbugs.gnu.org \
    --cc=eliz@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).