unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
* bug#17168: 24.3.50; Segfault at mark_object
@ 2014-04-02  7:44 Nicolas Richard
  2014-04-02 15:53 ` Daniel Colascione
  2014-04-02 16:29 ` Dmitry Antipov
  0 siblings, 2 replies; 59+ messages in thread
From: Nicolas Richard @ 2014-04-02  7:44 UTC (permalink / raw)
  To: 17168

This happened while I was away.

Program received signal SIGSEGV, Segmentation fault.
mark_object (arg=194) at alloc.c:6127
6127		if (ptr->gcmarkbit)
#0  mark_object (arg=194) at alloc.c:6127
#1  0x081b2a14 in mark_vectorlike (ptr=0xb9b0db0) at alloc.c:5785
#2  0x081b30df in mark_object (arg=194710965) at alloc.c:6117
#3  0x081b3116 in mark_object (arg=188747058) at alloc.c:6131
#4  0x081b1434 in mark_maybe_pointer (p=0xb400d30) at alloc.c:4563
#5  0x081b1484 in mark_memory (start=0xbfffd08c, end=0xbfffebdc) at alloc.c:4638
#6  0x081b14d1 in mark_stack () at alloc.c:4872
#7  0x081b231c in Fgarbage_collect () at alloc.c:5545
#8  0x0814bbb6 in maybe_gc () at lisp.h:4518
#9  0x081cdd73 in Ffuncall (nargs=2, args=0xbfffd2d0) at eval.c:2766
#10 0x081cbb68 in internal_condition_case_n (bfun=0x81cdcbb <Ffuncall>, nargs=2, args=0xbfffd2d0, handlers=139298778, hfun=0x8073814 <safe_eval_handler>) at eval.c:1436
#11 0x0807392c in safe_call (nargs=2, func=141642493) at xdisp.c:2609
#12 0x08073969 in safe_call1 (fn=141642493, arg=139298754) at xdisp.c:2625
#13 0x080875a8 in prepare_menu_bars () at xdisp.c:11512
#14 0x0808ab42 in redisplay_internal () at xdisp.c:13403
#15 0x08089e98 in redisplay () at xdisp.c:13022
#16 0x08153013 in read_char (commandflag=1, map=218382190, prev_event=139298754, used_mouse_menu=0xbfffe8a3, end_time=0x0) at keyboard.c:2567
#17 0x0815d730 in read_key_sequence (keybuf=0xbfffe9c0, bufsize=30, prompt=139298754, dont_downcase_last=false, can_return_switch_frame=true, fix_current_buffer=true, prevent_redisplay=false)
    at keyboard.c:9079
#18 0x08151016 in command_loop_1 () at keyboard.c:1449
#19 0x081cb7e6 in internal_condition_case (bfun=0x8150cd3 <command_loop_1>, handlers=139331834, hfun=0x81506a9 <cmd_error>) at eval.c:1354
#20 0x08150a6f in command_loop_2 (ignore=139298754) at keyboard.c:1174
#21 0x081cb16e in internal_catch (tag=139329882, func=0x8150a4b <command_loop_2>, arg=139298754) at eval.c:1118
#22 0x08150a29 in command_loop () at keyboard.c:1153
#23 0x08150345 in recursive_edit_1 () at keyboard.c:777
#24 0x08150481 in Frecursive_edit () at keyboard.c:845
#25 0x0814e8b1 in main (argc=2, argv=0xbfffecd4) at emacs.c:1646

Lisp Backtrace:
"Automatic GC" (0x84c678c)
0x8714af8 PVEC_COMPILED
"redisplay_internal (C function)" (0x84c678c)
#0  mark_object (arg=194) at alloc.c:6127
        ptr = 0xc0
        ptrx = 0xbfffcea8
        obj = 192
        cdr_count = 0
#1  0x081b2a14 in mark_vectorlike (ptr=0xb9b0db0) at alloc.c:5785
        size = 36
        i = 14
#2  0x081b30df in mark_object (arg=194710965) at alloc.c:6117
        ptr = 0xb9b0db0
        pvectype = 0
        obj = 194710960
        cdr_count = 0
#3  0x081b3116 in mark_object (arg=188747058) at alloc.c:6131
        ptr = 0xb400d30
        ptrx = 0x12
        obj = 188747056
        cdr_count = 0
#4  0x081b1434 in mark_maybe_pointer (p=0xb400d30) at alloc.c:4563
        obj = 188747058
        m = 0xb709250
#5  0x081b1484 in mark_memory (start=0xbfffd08c, end=0xbfffebdc) at alloc.c:4638
        p = 0xb400d30
        pp = 0xbfffde90
        i = 0
#6  0x081b14d1 in mark_stack () at alloc.c:4872
        end = 0xbfffd08c
#7  0x081b231c in Fgarbage_collect () at alloc.c:5545
        nextb = 0x0
        stack_top_variable = 0 '\000'
        i = 1617
        message_p = true
        count = 7
        start = {
          tv_sec = 1396375514, 
          tv_nsec = 188980653
        }
        retval = 139298754
        tot_before = 0
#8  0x0814bbb6 in maybe_gc () at lisp.h:4518
No locals.
#9  0x081cdd73 in Ffuncall (nargs=2, args=0xbfffd2d0) at eval.c:2766
        fun = 139319504
        original_fun = 139319297
        funcar = 0
        numargs = 1
        lisp_numargs = -1073753480
        val = 138917188
        internal_args = 0x0
        i = 5
#10 0x081cbb68 in internal_condition_case_n (bfun=0x81cdcbb <Ffuncall>, nargs=2, args=0xbfffd2d0, handlers=139298778, hfun=0x8073814 <safe_eval_handler>) at eval.c:1436
        val = 139352440
        c = 0x84e6500
#11 0x0807392c in safe_call (nargs=2, func=141642493) at xdisp.c:2609
        i = 2
        count = 5
        gcpro1 = {
          next = 0x8714afd, 
          var = 0xffd340, 
          nvars = 2
        }
        ap = 0xbfffd33c "H\323\377\277\005\262\024\bH\230U\b\310\323\377\277\250u\b\b\375Jq\b\302\207M\b\320\330M\b"
        args = 0xbfffd2d0
        val = 136011655
#12 0x08073969 in safe_call1 (fn=141642493, arg=139298754) at xdisp.c:2625
No locals.
#13 0x080875a8 in prepare_menu_bars () at xdisp.c:11512
        windows = 139298754
        all_windows = false
        some_windows = true
        gcpro1 = {
          next = 0xa0e9c28, 
          var = 0x1, 
          nvars = 1
        }
        gcpro2 = {
          next = 0x0, 
          var = 0xbfffd378, 
          nvars = 134576184
        }
        tooltip_frame = 139298754
#14 0x0808ab42 in redisplay_internal () at xdisp.c:13403
        w = 0xb75b720
        sw = 0xb75b720
        fr = 0x84e46d0
        pending = 0
        must_finish = false
        match_p = false
        tlbufpos = {
          charpos = 136133708, 
          bytepos = 139298754
        }
        tlendpos = {
          charpos = 141207980, 
          bytepos = -1073748696
        }
        number_of_visible_frames = 1
        count = 2
        sf = 0x84e46d0
        polling_stopped_here = 0
        tail = 139298754
        frame = 139347669
        consider_all_windows_p = 8
        update_miniwindow_p = false
#15 0x08089e98 in redisplay () at xdisp.c:13022
No locals.
#16 0x08153013 in read_char (commandflag=1, map=218382190, prev_event=139298754, used_mouse_menu=0xbfffe8a3, end_time=0x0) at keyboard.c:2567
        echo_current = false
        c = 139298754
        jmpcount = -1073748040
        local_getcjmp = {{
            __jmpbuf = {-1073748232, 136044005, 139319504, 0, -1073748264, 136011529}, 
            __mask_was_saved = 141081056, 
            __saved_mask = {
              __val = {141081056, 3221219032, 136133786, 139298754, 139319504, 141081056, 141431720, 139360971, 0, 3221219128, 135679500, 141431722, 139298778, 3221218816, 4294967295, 192, 0, 
                139298754, 3221219080, 135571235, 218382182, 3221219128, 135982083, 218382182, 218382174, 141431722, 141503502, 139298754, 139298754, 2, 139298754, 222500864}
            }
          }}
        save_jump = {{
            __jmpbuf = {56, 139360971, 162623043, 169126963, 177160939, 206411387}, 
            __mask_was_saved = 216942387, 
            __saved_mask = {
              __val = {3067047979, 3068297204, 3068298304, 0, 28, 3067056153, 198911504, 178059944, 28, 4, 211656168, 3221214072, 3221218920, 135574114, 139319504, 6, 3067056073, 0, 0, 
                139319504, 3221218952, 135574114, 139319504, 6, 3221218984, 136011027, 139319509, 139319504, 3221218968, 135574303, 139319509, 139321778}
            }
          }}
        tem = 139298754
        save = 142664326
        previous_echo_area_message = 139298754
        also_record = 139298754
        reread = false
        gcpro1 = {
          next = 0x84de1b2, 
          var = 0xbfffe6f8, 
          nvars = 136044861
        }
        gcpro2 = {
          next = 0x2fc, 
          var = 0x1000006, 
          nvars = 0
        }
        polling_stopped_here = false
        orig_kboard = 0x889e340
#17 0x0815d730 in read_key_sequence (keybuf=0xbfffe9c0, bufsize=30, prompt=139298754, dont_downcase_last=false, can_return_switch_frame=true, fix_current_buffer=true, prevent_redisplay=false)
    at keyboard.c:9079
        interrupted_kboard = 0x889e340
        interrupted_frame = 0x84e46d0
        key = 139319509
        used_mouse_menu = false
        echo_local_start = 0
        last_real_key_start = 0
        keys_local_start = 0
        new_binding = 139347669
        count = 2
        t = 0
        echo_start = 0
        keys_start = 0
        current_binding = 218382190
        first_event = 139298754
        first_unbound = 31
        mock_input = 0
        fkey = {
          parent = 140966150, 
          map = 140966150, 
          start = 0, 
          end = 0
        }
        keytran = {
          parent = 139286286, 
          map = 139286286, 
          start = 0, 
          end = 0
        }
        indec = {
          parent = 140966158, 
          map = 140966158, 
          start = 0, 
          end = 0
        }
        shift_translated = false
        delayed_switch_frame = 139298754
        original_uppercase = 134622171
        original_uppercase_position = -1
        dummyflag = false
        starting_buffer = 0x84dd8d0
        fake_prefixed_keys = 139298754
        gcpro1 = {
          next = 0x84d87c2, 
          var = 0xbfffe8c8, 
          nvars = 136011655
        }
#18 0x08151016 in command_loop_1 () at keyboard.c:1449
        cmd = 141463658
        keybuf = {96, 12, 137180145, 139298754, 139370802, 139298754, 4, 139298754, 141445706, 0, -1073747448, 135596160, 139329858, 210791238, 137180145, 139298754, 197184288, 0, -1073747352, 
          135595989, 210791238, -1073747409, -1073747384, 136104088, 2, 193733073, -1227911223, 0, 1919251558, 1797271584}
        i = 2
        prev_modiff = 11
        prev_buffer = 0x84dd8d0
        already_adjusted = false
#19 0x081cb7e6 in internal_condition_case (bfun=0x8150cd3 <command_loop_1>, handlers=139331834, hfun=0x81506a9 <cmd_error>) at eval.c:1354
        val = 193733073
        c = 0x84e6428
#20 0x08150a6f in command_loop_2 (ignore=139298754) at keyboard.c:1174
        val = 0
#21 0x081cb16e in internal_catch (tag=139329882, func=0x8150a4b <command_loop_2>, arg=139298754) at eval.c:1118
        val = 139298754
        c = 0x88a0570
#22 0x08150a29 in command_loop () at keyboard.c:1153
No locals.
#23 0x08150345 in recursive_edit_1 () at keyboard.c:777
        count = 1
        val = -1073747128
#24 0x08150481 in Frecursive_edit () at keyboard.c:845
        count = 0
        buffer = 139298754
#25 0x0814e8b1 in main (argc=2, argv=0xbfffecd4) at emacs.c:1646
        dummy = 2
        stack_bottom_variable = 0 '\000'
        do_initial_setlocale = true
        dumping = false
        skip_args = 1
        rlim = {
          rlim_cur = 8388608, 
          rlim_max = 18446744073709551615
        }
        no_loadup = false
        junk = 0x0
        dname_arg = 0x0
        ch_to_dir = 0x0
        original_pwd = 0x0

Lisp Backtrace:
"Automatic GC" (0x84c678c)
0x8714af8 PVEC_COMPILED
"redisplay_internal (C function)" (0x84c678c)
#0  mark_object (arg=194) at alloc.c:6127
#1  0x081b2a14 in mark_vectorlike (ptr=0xb9b0db0) at alloc.c:5785
#2  0x081b30df in mark_object (arg=194710965) at alloc.c:6117
#3  0x081b3116 in mark_object (arg=188747058) at alloc.c:6131
#4  0x081b1434 in mark_maybe_pointer (p=0xb400d30) at alloc.c:4563
#5  0x081b1484 in mark_memory (start=0xbfffd08c, end=0xbfffebdc) at alloc.c:4638
#6  0x081b14d1 in mark_stack () at alloc.c:4872
#7  0x081b231c in Fgarbage_collect () at alloc.c:5545
#8  0x0814bbb6 in maybe_gc () at lisp.h:4518
#9  0x081cdd73 in Ffuncall (nargs=2, args=0xbfffd2d0) at eval.c:2766
#10 0x081cbb68 in internal_condition_case_n (bfun=0x81cdcbb <Ffuncall>, nargs=2, args=0xbfffd2d0, handlers=139298778, hfun=0x8073814 <safe_eval_handler>) at eval.c:1436
#11 0x0807392c in safe_call (nargs=2, func=141642493) at xdisp.c:2609
#12 0x08073969 in safe_call1 (fn=141642493, arg=139298754) at xdisp.c:2625
#13 0x080875a8 in prepare_menu_bars () at xdisp.c:11512
#14 0x0808ab42 in redisplay_internal () at xdisp.c:13403
#15 0x08089e98 in redisplay () at xdisp.c:13022
#16 0x08153013 in read_char (commandflag=1, map=218382190, prev_event=139298754, used_mouse_menu=0xbfffe8a3, end_time=0x0) at keyboard.c:2567
#17 0x0815d730 in read_key_sequence (keybuf=0xbfffe9c0, bufsize=30, prompt=139298754, dont_downcase_last=false, can_return_switch_frame=true, fix_current_buffer=true, prevent_redisplay=false)
    at keyboard.c:9079
#18 0x08151016 in command_loop_1 () at keyboard.c:1449
#19 0x081cb7e6 in internal_condition_case (bfun=0x8150cd3 <command_loop_1>, handlers=139331834, hfun=0x81506a9 <cmd_error>) at eval.c:1354
#20 0x08150a6f in command_loop_2 (ignore=139298754) at keyboard.c:1174
#21 0x081cb16e in internal_catch (tag=139329882, func=0x8150a4b <command_loop_2>, arg=139298754) at eval.c:1118
#22 0x08150a29 in command_loop () at keyboard.c:1153
#23 0x08150345 in recursive_edit_1 () at keyboard.c:777
#24 0x08150481 in Frecursive_edit () at keyboard.c:845
#25 0x0814e8b1 in main (argc=2, argv=0xbfffecd4) at emacs.c:1646

Lisp Backtrace:
"Automatic GC" (0x84c678c)
0x8714af8 PVEC_COMPILED
"redisplay_internal (C function)" (0x84c678c)




In GNU Emacs 24.3.50.7 (i686-pc-linux-gnu, X toolkit, Xaw3d scroll bars)
 of 2014-03-27 on geodiff-mac3
Windowing system distributor `The X.Org Foundation', version 11.0.11304000
System Description:	Gentoo Base System release 2.2

Configured using:
 `configure --with-x-toolkit=lucid 'CFLAGS= -O0 -g3''

Important settings:
  value of $LANG: fr_FR.UTF-8
  locale-coding-system: utf-8-unix


-- 
Nico.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02  7:44 bug#17168: 24.3.50; Segfault at mark_object Nicolas Richard
@ 2014-04-02 15:53 ` Daniel Colascione
  2014-04-02 17:59   ` Nicolas Richard
  2014-04-02 16:29 ` Dmitry Antipov
  1 sibling, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-02 15:53 UTC (permalink / raw)
  To: Nicolas Richard, 17168

[-- Attachment #1: Type: text/plain, Size: 371 bytes --]

On 04/02/2014 12:44 AM, Nicolas Richard wrote:
> This happened while I was away.

i686? Linux? mark_vectorlike? Good. This bug sounds like a manifestation
of the GC bug we've been hunting for a while. Would you be comfortable
sharing a core dump next time (perhaps privately)? If not, please
collect a core dump anyway so that we can ask you questions about it.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02  7:44 bug#17168: 24.3.50; Segfault at mark_object Nicolas Richard
  2014-04-02 15:53 ` Daniel Colascione
@ 2014-04-02 16:29 ` Dmitry Antipov
  2014-04-02 19:46   ` Daniel Colascione
  1 sibling, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2014-04-02 16:29 UTC (permalink / raw)
  To: Nicolas Richard; +Cc: 17168

On 04/02/2014 11:44 AM, Nicolas Richard wrote:

> This happened while I was away.

IIUC this is pretty similar to GC crashes observed by RMS (http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15688).
What gcc did you use to compile this binary?

Dmitry






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 15:53 ` Daniel Colascione
@ 2014-04-02 17:59   ` Nicolas Richard
  0 siblings, 0 replies; 59+ messages in thread
From: Nicolas Richard @ 2014-04-02 17:59 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Nicolas Richard, 17168

Daniel Colascione <dancol@dancol.org> writes:
> On 04/02/2014 12:44 AM, Nicolas Richard wrote:
>> This happened while I was away.
>
> i686? Linux? mark_vectorlike? Good. This bug sounds like a manifestation
> of the GC bug we've been hunting for a while. Would you be comfortable
> sharing a core dump next time (perhaps privately)? If not, please
> collect a core dump anyway so that we can ask you questions about it.

For the record, I sent the core dump privately. I also kept the gdb
session open.

-- 
Nico.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 16:29 ` Dmitry Antipov
@ 2014-04-02 19:46   ` Daniel Colascione
  2014-04-02 20:33     ` Daniel Colascione
                       ` (2 more replies)
  0 siblings, 3 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-02 19:46 UTC (permalink / raw)
  To: Dmitry Antipov, Nicolas Richard; +Cc: 17168

[-- Attachment #1: Type: text/plain, Size: 553 bytes --]

On 04/02/2014 09:29 AM, Dmitry Antipov wrote:
> On 04/02/2014 11:44 AM, Nicolas Richard wrote:
> 
>> This happened while I was away.
> 
> IIUC this is pretty similar to GC crashes observed by RMS
> (http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15688).
> What gcc did you use to compile this binary?

It's exactly the same crash. We're trying to mark clear-transient-map.
Nicolas, what is the bzr revision from which this Emacs was compiled?
(Is there a way to get this information from a core other than just
walking the symbol obarray?)


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 19:46   ` Daniel Colascione
@ 2014-04-02 20:33     ` Daniel Colascione
  2014-04-02 20:57       ` Nicolas Richard
  2014-04-02 20:37     ` Eli Zaretskii
  2014-04-02 20:49     ` Nicolas Richard
  2 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-02 20:33 UTC (permalink / raw)
  To: Dmitry Antipov, Nicolas Richard; +Cc: 17168

[-- Attachment #1: Type: text/plain, Size: 700 bytes --]

On 04/02/2014 12:46 PM, Daniel Colascione wrote:
> On 04/02/2014 09:29 AM, Dmitry Antipov wrote:
>> On 04/02/2014 11:44 AM, Nicolas Richard wrote:
>>
>>> This happened while I was away.
>>
>> IIUC this is pretty similar to GC crashes observed by RMS
>> (http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15688).
>> What gcc did you use to compile this binary?
> 
> It's exactly the same crash. We're trying to mark clear-transient-map.
> Nicolas, what is the bzr revision from which this Emacs was compiled?
> (Is there a way to get this information from a core other than just
> walking the symbol obarray?)

Also, Nicolas, can you call mem_find on 194710965, 188747058, and 194710960?


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 19:46   ` Daniel Colascione
  2014-04-02 20:33     ` Daniel Colascione
@ 2014-04-02 20:37     ` Eli Zaretskii
  2014-04-02 20:40       ` Daniel Colascione
  2014-04-02 20:49     ` Nicolas Richard
  2 siblings, 1 reply; 59+ messages in thread
From: Eli Zaretskii @ 2014-04-02 20:37 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: theonewiththeevillook, dmantipov, 17168

> Date: Wed, 02 Apr 2014 12:46:13 -0700
> From: Daniel Colascione <dancol@dancol.org>
> Cc: 17168@debbugs.gnu.org
> 
> Nicolas, what is the bzr revision from which this Emacs was compiled?
> (Is there a way to get this information from a core other than just
> walking the symbol obarray?)

Like this:

  (gdb) p Fsymbol_value(intern("emacs-bzr-version"))
  $5 = 57020481
  (gdb) xtype
  Lisp_String
  (gdb) xstring
  $6 = (struct Lisp_String *) 0x3661040
  "116894 rudalics@gmx.at-20140402143333-a56l2vy9oak0njsg"

Alas, you cannot do this when debugging a core file, you need a
running program.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 20:37     ` Eli Zaretskii
@ 2014-04-02 20:40       ` Daniel Colascione
  2014-04-02 20:55         ` Eli Zaretskii
  2014-04-03  6:59         ` Dmitry Antipov
  0 siblings, 2 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-02 20:40 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: theonewiththeevillook, dmantipov, 17168

[-- Attachment #1: Type: text/plain, Size: 834 bytes --]

On 04/02/2014 01:37 PM, Eli Zaretskii wrote:
>> Date: Wed, 02 Apr 2014 12:46:13 -0700
>> From: Daniel Colascione <dancol@dancol.org>
>> Cc: 17168@debbugs.gnu.org
>>
>> Nicolas, what is the bzr revision from which this Emacs was compiled?
>> (Is there a way to get this information from a core other than just
>> walking the symbol obarray?)
> 
> Like this:
> 
>   (gdb) p Fsymbol_value(intern("emacs-bzr-version"))
>   $5 = 57020481
>   (gdb) xtype
>   Lisp_String
>   (gdb) xstring
>   $6 = (struct Lisp_String *) 0x3661040
>   "116894 rudalics@gmx.at-20140402143333-a56l2vy9oak0njsg"
> 
> Alas, you cannot do this when debugging a core file, you need a
> running program.

I was afraid of that. Would you object to declaring
emacs-repository-version in C so that we can find it more easily in core
dumps?


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 19:46   ` Daniel Colascione
  2014-04-02 20:33     ` Daniel Colascione
  2014-04-02 20:37     ` Eli Zaretskii
@ 2014-04-02 20:49     ` Nicolas Richard
  2 siblings, 0 replies; 59+ messages in thread
From: Nicolas Richard @ 2014-04-02 20:49 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Nicolas Richard, Dmitry Antipov, 17168

Daniel Colascione <dancol@dancol.org> writes:
> On 04/02/2014 09:29 AM, Dmitry Antipov wrote:
>> IIUC this is pretty similar to GC crashes observed by RMS
>> (http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15688).
>> What gcc did you use to compile this binary?
>
> It's exactly the same crash. We're trying to mark clear-transient-map.
> Nicolas, what is the bzr revision from which this Emacs was compiled?

I don't use the bzr repo, but the git repo, so emacs-bzr-version will be
nil. Unless I updated my git repo (but I don't think I did), it was git
commit 5f7fb09:
Author: YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
Date:   Thu Mar 27 18:25:17 2014 +0200

-- 
Nico.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 20:40       ` Daniel Colascione
@ 2014-04-02 20:55         ` Eli Zaretskii
  2014-04-03  6:59         ` Dmitry Antipov
  1 sibling, 0 replies; 59+ messages in thread
From: Eli Zaretskii @ 2014-04-02 20:55 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: theonewiththeevillook, dmantipov, 17168

> Date: Wed, 02 Apr 2014 13:40:06 -0700
> From: Daniel Colascione <dancol@dancol.org>
> CC: dmantipov@yandex.ru, theonewiththeevillook@yahoo.fr, 
>  17168@debbugs.gnu.org
> 
> >   (gdb) p Fsymbol_value(intern("emacs-bzr-version"))
> >   $5 = 57020481
> >   (gdb) xtype
> >   Lisp_String
> >   (gdb) xstring
> >   $6 = (struct Lisp_String *) 0x3661040
> >   "116894 rudalics@gmx.at-20140402143333-a56l2vy9oak0njsg"
> > 
> > Alas, you cannot do this when debugging a core file, you need a
> > running program.
> 
> I was afraid of that. Would you object to declaring
> emacs-repository-version in C so that we can find it more easily in core
> dumps?

I don't see anything wrong with that.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 20:33     ` Daniel Colascione
@ 2014-04-02 20:57       ` Nicolas Richard
  2014-04-02 21:50         ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Nicolas Richard @ 2014-04-02 20:57 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Nicolas Richard, Dmitry Antipov, 17168

Daniel Colascione <dancol@dancol.org> writes:
> Also, Nicolas, can you call mem_find on 194710965, 188747058, and
> 194710960?

I must warn you that I'm a total ignorant of many things, including C
and gdb. Here's my attempt :

(gdb) mem_find(194710965)
Undefined command: "mem_find".  Try "help".
(gdb) p mem_find(194710965)
$1 = (struct mem_node *) 0xb9b1d50
(gdb) p mem_find(188747058)
$2 = (struct mem_node *) 0xb709250
(gdb) p mem_find(194710960)
$3 = (struct mem_node *) 0xb9b1d50

I guess that this information is of little value by itself, but I don't
want to mess up things while trying to get more information.

-- 
Nico.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 20:57       ` Nicolas Richard
@ 2014-04-02 21:50         ` Daniel Colascione
  2014-04-02 23:24           ` Stefan Monnier
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-02 21:50 UTC (permalink / raw)
  To: Nicolas Richard; +Cc: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 2640 bytes --]

On 04/02/2014 01:57 PM, Nicolas Richard wrote:
> Daniel Colascione <dancol@dancol.org> writes:
>> Also, Nicolas, can you call mem_find on 194710965, 188747058, and
>> 194710960?
> 
> I must warn you that I'm a total ignorant of many things, including C
> and gdb. Here's my attempt :
> 
> (gdb) mem_find(194710965)
> Undefined command: "mem_find".  Try "help".
> (gdb) p mem_find(194710965)
> $1 = (struct mem_node *) 0xb9b1d50
> (gdb) p mem_find(188747058)
> $2 = (struct mem_node *) 0xb709250
> (gdb) p mem_find(194710960)
> $3 = (struct mem_node *) 0xb9b1d50
> 
> I guess that this information is of little value by itself, but I don't
> want to mess up things while trying to get more information.

Thanks. I looked at the dump and checked that what we already know is
correct.

The vector we're trying to mark is in mem_node 0xb9b1d50:

(gdb) set $m = (struct mem_node *) 0xb9b1d50
(gdb) print *$m
$116 = {
  left = 0x84b6c20 <mem_z>,
  right = 0x84b6c20 <mem_z>,
  parent = 0x93b2f08,
  start = 0xb9b0d50,
  end = 0xb9b1d48,
  color = MEM_RED,
  type = MEM_TYPE_VECTOR_BLOCK

The contents of the block begin here:

(gdb) set $block = (struct vector_block*) ($m->start)
(gdb) print $block
$122 = (struct vector_block *) 0xb9b0d50
(gdb) set $vector = (struct Lisp_Vector*) $block->data
(gdb) print *$block
$123 = {
  data =
"\023\000\000\200\342\210M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\b\201\331p\nJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\bJ\334M\b\006\000\000\314\302\207M\b\201\272\271\fmɏ\v$\000\000\200&\243\060\f\302\207M\b\"_m\b\000\020\000Ax\366\267\f\b\200\000\311]S@\f͞\234\f\325\330M\b\302\207M\b\302\207M\b͞\234\f\302\207M\b\025\016\233\v\302\000\000\000+",
'\000' <repeats 11 times>, "\302\000\000\000+", '\000' <repeats 11
times>, "\061\064:0\002\000\000\200%\016\233\v\245\016\233\vven "...,
  next = 0xb9cc2a8

The vector we're trying to mark is 96 bytes inside this block:

(gdb) print (char*)ptr - (char*)$vector
$135 = 96

The first vector in the block is a regular vector with 0x13 elements:

(gdb) print/x $vector->header.size
$156 = 0x80000013

It's 80 bytes long:

(gdb) print header_size + (($vector->header.size &~ ARRAY_MARK_FLAG) *
word_size)
$148 = 80

The next vector in the block is a PVEC_COMPILED:

(gdb) print/x $vector->header.size
$159 = 0xcc000006

It's 32 bytes long, which means that we're trying to mark a pointer into
the middle of the vector.

The clear-transient-map symbol itself, of course, is live. It's
perfectly normal and its value slot is set to Qunbound.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 21:50         ` Daniel Colascione
@ 2014-04-02 23:24           ` Stefan Monnier
  2014-04-03  0:28             ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2014-04-02 23:24 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Nicolas Richard, Dmitry Antipov, 17168

> It's 32 bytes long, which means that we're trying to mark a pointer into
> the middle of the vector.
> The clear-transient-map symbol itself, of course, is live. It's
> perfectly normal and its value slot is set to Qunbound.

So, IIUC the symbol-function slot of the clear-transient-map symbol
points in the middle of a vector?

Since the symbol-function slot of the clear-transient-map symbol is only
set once, I think this means that the vector to which it pointed has
been somehow freed.

Of course that shouldn't be possible: at any previous GC, either the
clear-transient-map symbol was found live and traced (so the vector to
which it pointed shouldn't have been freed) or it wasn't found live, in
which case the symbol-function slot should have been set to the special
"dead" value.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 23:24           ` Stefan Monnier
@ 2014-04-03  0:28             ` Daniel Colascione
  0 siblings, 0 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-03  0:28 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Nicolas Richard, Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 1333 bytes --]

On 04/02/2014 04:24 PM, Stefan Monnier wrote:
>> It's 32 bytes long, which means that we're trying to mark a pointer into
>> the middle of the vector.
>> The clear-transient-map symbol itself, of course, is live. It's
>> perfectly normal and its value slot is set to Qunbound.
> 
> So, IIUC the symbol-function slot of the clear-transient-map symbol
> points in the middle of a vector?

That's what my analysis seems to indicate.

> Since the symbol-function slot of the clear-transient-map symbol is only
> set once, I think this means that the vector to which it pointed has
> been somehow freed.

That's what I speculated last week, but I still have no idea how it
would be possible.

> Of course that shouldn't be possible: at any previous GC, either the
> clear-transient-map symbol was found live and traced (so the vector to
> which it pointed shouldn't have been freed) or it wasn't found live, in
> which case the symbol-function slot should have been set to the special
> "dead" value.

I added some code to trunk that might help track down the problem. Now
we can mark certain objects as "suspicious" (only vectors for now, but
that's sufficient); when we free one of these suspicious objects, we
record a stack trace. This way, if we crash later, we can figure out
where things went wrong.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-02 20:40       ` Daniel Colascione
  2014-04-02 20:55         ` Eli Zaretskii
@ 2014-04-03  6:59         ` Dmitry Antipov
  2014-04-03  7:04           ` Dmitry Antipov
  1 sibling, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2014-04-03  6:59 UTC (permalink / raw)
  To: 17168; +Cc: theonewiththeevillook

[-- Attachment #1: Type: text/plain, Size: 1271 bytes --]

Hopefully I found the way to catch bogus object in 'function' slot
of a Lisp_Symbol.  100% reproducible for me, as of bzr revision 116934.

1. Apply this patch.

2. Compile with -O0 -g3 and --enable-checking.

3. Run 'emacs -Q', then M-x byte-force-recompile
    /path/to/trunk/lis/org

4. Crash ==>

#0  0x000000379220f62b in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/pt-raise.c:37
#1  0x0000000000569aff in terminate_due_to_signal (sig=6, backtrace_limit=2147483647) at ../../trunk/src/emacs.c:382
#2  0x00000000005f089a in die (
     msg=0x70f498 "SYMBOLP (sym->s.function) || CONSP (sym->s.function) || COMPILEDP (sym->s.function) || SUBRP (sym->s.function)",
     file=0x70e420 "../../trunk/src/alloc.c", line=6613) at ../../trunk/src/alloc.c:6913
#3  0x00000000005f00b5 in sweep_symbols () at ../../trunk/src/alloc.c:6610
#4  0x00000000005f03bb in gc_sweep () at ../../trunk/src/alloc.c:6735
#5  0x00000000005ede1e in Fgarbage_collect () at ../../trunk/src/alloc.c:5632
#6  0x0000000000567706 in maybe_gc () at ../../trunk/src/lisp.h:4520
#7  0x000000000065b95f in exec_byte_code (bytestr=..., vector=..., maxdepth=..., args_template=..., nargs=2, args=0x7fff66de7f70)
     at ../../trunk/src/bytecode.c:954

[...next frames probably irrelevant...]

Dmitry


[-- Attachment #2: bug17168_bogus_function_eassert.patch --]
[-- Type: text/x-patch, Size: 918 bytes --]

=== modified file 'src/alloc.c'
--- src/alloc.c	2014-04-03 00:37:51 +0000
+++ src/alloc.c	2014-04-03 06:42:53 +0000
@@ -6174,6 +6174,11 @@
 	  break;
 	CHECK_ALLOCATED_AND_LIVE (live_symbol_p);
 	ptr->gcmarkbit = 1;
+	/* Attempt to catch bogus objects.  */
+	eassert (SYMBOLP (ptr->function)
+		 || CONSP (ptr->function)
+		 || COMPILEDP (ptr->function)
+		 || SUBRP (ptr->function));
 	mark_object (ptr->function);
 	mark_object (ptr->plist);
 	switch (ptr->redirect)
@@ -6601,6 +6606,11 @@
               if (!pure_p)
                 eassert (!STRING_MARKED_P (XSTRING (sym->s.name)));
               sym->s.gcmarkbit = 0;
+              /* Attempt to catch bogus objects.  */
+              eassert (SYMBOLP (sym->s.function)
+                       || CONSP (sym->s.function)
+                       || COMPILEDP (sym->s.function)
+                       || SUBRP (sym->s.function));
             }
         }
 


^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03  6:59         ` Dmitry Antipov
@ 2014-04-03  7:04           ` Dmitry Antipov
  2014-04-03  7:55             ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2014-04-03  7:04 UTC (permalink / raw)
  To: 17168

On 04/03/2014 10:59 AM, Dmitry Antipov wrote:

> 3. Run 'emacs -Q', then M-x byte-force-recompile
>     /path/to/trunk/lis/org
                      ^^^^^^^
Mean /path/to/trunk/lisp/org, i.e. all Org mode.

Dmitry






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03  7:04           ` Dmitry Antipov
@ 2014-04-03  7:55             ` Daniel Colascione
  2014-04-03  9:08               ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-03  7:55 UTC (permalink / raw)
  To: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 9396 bytes --]

On 04/03/2014 12:04 AM, Dmitry Antipov wrote:
> On 04/03/2014 10:59 AM, Dmitry Antipov wrote:
> 
>> 3. Run 'emacs -Q', then M-x byte-force-recompile
>>     /path/to/trunk/lis/org
>                      ^^^^^^^
> Mean /path/to/trunk/lisp/org, i.e. all Org mode.

Nice work. What gave you the idea of using byte-force-recompile to
repro? I'd tried a few other stress cases myself and couldn't find
anything. Your repro works perfectly.

In eval-after-load, we have code that looks like this:

(fset fun (lambda (file)
            (when (equal file lfn)
              (remove-hook 'after-load-functions fun)
              (funcall func))))

This code looks just like the subr.el code that was causing problems for
Richard. I changed eval-after-load locally to something like this and
re-ran:

(fset fun
      (suspicious-object (lambda (file)
                           (when (equal file lfn)
                             (remove-hook 'after-load-functions fun)
                             (funcall func)))))

When your assertion hits, the vector we're trying to free mark is dead
and seems to have garbage in the function slot. It's already been freed.

Below is the spot where we're freeing that lambda. If we don't set an
alloc.c breakpoint and let the code continue to assertion failure, then
the stack in suspicious_free_history is exactly what's below.

Breakpoint 4, detect_suspicious_free (ptr=0x1c363f8) at alloc.c:6868
6868	        rec =
&suspicious_free_history[suspicious_free_history_index++];
(gdb) wher
#0  detect_suspicious_free (ptr=0x1c363f8) at alloc.c:6868
#1  0x000000000056779e in cleanup_vector (vector=0x1c363f8) at alloc.c:2959
#2  0x0000000000567962 in sweep_vectors () at alloc.c:3017
#3  0x000000000056dd69 in gc_sweep () at alloc.c:6738
#4  0x000000000056b893 in Fgarbage_collect () at alloc.c:5632
#5  0x00000000004e4c95 in maybe_gc () at lisp.h:4520
#6  0x00000000005d96f4 in exec_byte_code (bytestr=13432081,
vector=29098381,
    maxdepth=16, args_template=0, nargs=0, args=0x7fffffff9410)
    at bytecode.c:753
#7  0x0000000000590e39 in funcall_lambda (fun=29327469, nargs=0,
    arg_vector=0x7fffffff9410) at eval.c:2983
#8  0x0000000000590828 in Ffuncall (nargs=1, args=0x7fffffff9408)
    at eval.c:2864
#9  0x00000000005d9ecb in exec_byte_code (bytestr=13433121,
vector=29327533,
    maxdepth=4, args_template=0, nargs=0, args=0x7fffffff9928)
    at bytecode.c:919
#10 0x0000000000590e39 in funcall_lambda (fun=29098549, nargs=0,
    arg_vector=0x7fffffff9928) at eval.c:2983
#11 0x0000000000590828 in Ffuncall (nargs=1, args=0x7fffffff9920)
    at eval.c:2864
#12 0x000000000058ee87 in eval_sub (form=29022502) at eval.c:2157
#13 0x000000000058cb9a in internal_lisp_condition_case (var=13413026,
    bodyform=29022502, handlers=29022406) at eval.c:1323
#14 0x00000000005db0ac in exec_byte_code (bytestr=13431537,
vector=18344837,
    maxdepth=64, args_template=1028, nargs=1, args=0x7fffffffa0f8)
    at bytecode.c:1169
#15 0x0000000000590e39 in funcall_lambda (fun=18345373, nargs=1,
    arg_vector=0x7fffffffa0f0) at eval.c:2983
#16 0x0000000000590828 in Ffuncall (nargs=2, args=0x7fffffffa0e8)
    at eval.c:2864
#17 0x00000000005d9ecb in exec_byte_code (bytestr=13425857,
vector=18336269,
    maxdepth=68, args_template=2052, nargs=2, args=0x7fffffffa680)
    at bytecode.c:919
#18 0x0000000000590e39 in funcall_lambda (fun=18337101, nargs=2,
    arg_vector=0x7fffffffa670) at eval.c:2983
#19 0x0000000000590828 in Ffuncall (nargs=3, args=0x7fffffffa668)
    at eval.c:2864
#20 0x00000000005d9ecb in exec_byte_code (bytestr=13424785,
vector=17895757,
    maxdepth=40, args_template=4100, nargs=3, args=0x7fffffffabd0)
    at bytecode.c:919
#21 0x0000000000590e39 in funcall_lambda (fun=17895885, nargs=3,
    arg_vector=0x7fffffffabb8) at eval.c:2983
#22 0x0000000000590828 in Ffuncall (nargs=4, args=0x7fffffffabb0)
    at eval.c:2864
#23 0x00000000005d9ecb in exec_byte_code (bytestr=13414625,
vector=17008333,
    maxdepth=28, args_template=0, nargs=0, args=0x7fffffffb0e0)
    at bytecode.c:919
#24 0x0000000000590e39 in funcall_lambda (fun=18006989, nargs=0,
    arg_vector=0x7fffffffb0e0) at eval.c:2983
#25 0x0000000000590828 in Ffuncall (nargs=1, args=0x7fffffffb0d8)
    at eval.c:2864
#26 0x00000000005d9ecb in exec_byte_code (bytestr=13424001,
vector=18250445,
    maxdepth=4, args_template=0, nargs=0, args=0x7fffffffb5f8)
    at bytecode.c:919
#27 0x0000000000590e39 in funcall_lambda (fun=18327557, nargs=0,
    arg_vector=0x7fffffffb5f8) at eval.c:2983
#28 0x0000000000590828 in Ffuncall (nargs=1, args=0x7fffffffb5f0)
    at eval.c:2864
#29 0x000000000058ee87 in eval_sub (form=13218422) at eval.c:2157
#30 0x000000000058cb9a in internal_lisp_condition_case (var=13412402,
    bodyform=13218422, handlers=13218310) at eval.c:1323
#31 0x00000000005db0ac in exec_byte_code (bytestr=13414065,
vector=17986453,
    maxdepth=104, args_template=3076, nargs=3, args=0x7fffffffbdf0)
    at bytecode.c:1169
#32 0x0000000000590e39 in funcall_lambda (fun=17986813, nargs=3,
    arg_vector=0x7fffffffbdd8) at eval.c:2983
#33 0x0000000000590828 in Ffuncall (nargs=4, args=0x7fffffffbdd0)
    at eval.c:2864
#34 0x00000000005d9ecb in exec_byte_code (bytestr=13413841,
vector=17986061,
    maxdepth=20, args_template=1028, nargs=1, args=0x7fffffffc278)
    at bytecode.c:919
#35 0x0000000000590e39 in funcall_lambda (fun=17986093, nargs=1,
    arg_vector=0x7fffffffc270) at eval.c:2983
#36 0x0000000000590b45 in apply_lambda (fun=17986093, args=17246278)
    at eval.c:2924
#37 0x000000000058f191 in eval_sub (form=17246438) at eval.c:2230
#38 0x00000000005c0a79 in readevalloop (readcharfun=17665045, stream=0x0,
    sourcename=13097937, printflag=false, unibyte=12966770,
readfun=12966770,
    start=12966770, end=12966770) at lread.c:1934
#39 0x00000000005c0d4f in Feval_buffer (buffer=17665045,
printflag=12966770,
    filename=16200945, unibyte=12966770, do_allow_print=12966818)
    at lread.c:1995
#40 0x0000000000590702 in Ffuncall (nargs=6, args=0x7fffffffc5e8)
    at eval.c:2831
#41 0x00000000005d9ecb in exec_byte_code (bytestr=9101593, vector=9101629,
    maxdepth=24, args_template=12966770, nargs=0, args=0x0) at
bytecode.c:919
#42 0x0000000000591224 in funcall_lambda (fun=9101469, nargs=4,
    arg_vector=0x8ae13d <pure+178429>) at eval.c:3049
#43 0x0000000000590828 in Ffuncall (nargs=5, args=0x7fffffffcb80)
    at eval.c:2864
#44 0x00000000005900a9 in call4 (fn=13233138, arg1=16200945, arg2=16200945,
    arg3=12966770, arg4=12966818) at eval.c:2663
#45 0x00000000005bf0ce in Fload (file=12968289, noerror=12966770,
    nomessage=12966818, nosuffix=12966770, must_suffix=12966770)
    at lread.c:1305
#46 0x0000000000590702 in Ffuncall (nargs=4, args=0x7fffffffcf18)
    at eval.c:2831
#47 0x00000000005d9ecb in exec_byte_code (bytestr=9509777, vector=9509813,
    maxdepth=92, args_template=1028, nargs=1, args=0x7fffffffd468)
    at bytecode.c:919
#48 0x0000000000590e39 in funcall_lambda (fun=9509733, nargs=1,
    arg_vector=0x7fffffffd460) at eval.c:2983
#49 0x0000000000590828 in Ffuncall (nargs=2, args=0x7fffffffd458)
    at eval.c:2864
#50 0x00000000005d9ecb in exec_byte_code (bytestr=9483993, vector=9484029,
    maxdepth=68, args_template=0, nargs=0, args=0x7fffffffd9f8)
    at bytecode.c:919
#51 0x0000000000590e39 in funcall_lambda (fun=9483949, nargs=0,
    arg_vector=0x7fffffffd9f8) at eval.c:2983
#52 0x0000000000590828 in Ffuncall (nargs=1, args=0x7fffffffd9f0)
    at eval.c:2864
#53 0x00000000005d9ecb in exec_byte_code (bytestr=9480481, vector=9480517,
    maxdepth=48, args_template=0, nargs=0, args=0x7fffffffded0)
    at bytecode.c:919
#54 0x0000000000590e39 in funcall_lambda (fun=9480437, nargs=0,
    arg_vector=0x7fffffffded0) at eval.c:2983
#55 0x0000000000590b45 in apply_lambda (fun=9480437, args=12966770)
    at eval.c:2924
#56 0x000000000058f191 in eval_sub (form=13213110) at eval.c:2230
#57 0x000000000058e66c in Feval (form=13213110, lexical=12966770)
    at eval.c:2003
#58 0x00000000004eb0a4 in top_level_2 () at keyboard.c:1183
#59 0x000000000058ccfd in internal_condition_case (
    bfun=0x4eb087 <top_level_2>, handlers=13017586, hfun=0x4eab6d
<cmd_error>)
    at eval.c:1354
#60 0x00000000004eb0de in top_level_1 (ignore=12966770) at keyboard.c:1191
#61 0x000000000058c181 in internal_catch (tag=13013522,
    func=0x4eb0a6 <top_level_1>, arg=12966770) at eval.c:1118
#62 0x00000000004eaffd in command_loop () at keyboard.c:1152
#63 0x00000000004ea678 in recursive_edit_1 () at keyboard.c:777
#64 0x00000000004ea85d in Frecursive_edit () at keyboard.c:845
#65 0x00000000004e8748 in main (argc=5, argv=0x7fffffffe3a8) at emacs.c:1654

Lisp Backtrace:
"Automatic GC" (0xc51790)
0x1bf8068 PVEC_COMPILED
0x1bc0230 PVEC_COMPILED
"funcall" (0xffff9920)
"byte-compile-from-buffer" (0xffffa0f0)
"byte-compile-file" (0xffffa670)
"byte-recompile-file" (0xffffabb8)
0x112c3c8 PVEC_COMPILED
0x117a800 PVEC_COMPILED
"funcall" (0xffffb5f0)
"byte-recompile-directory" (0xffffbdd8)
"byte-force-recompile" (0xffffc270)
"eval-buffer" (0xffffc5f0)
"load-with-code-conversion" (0xffffcb88)
"load" (0xffffcf20)
"command-line-1" (0xffffd460)
"command-line" (0xffffd9f8)
"normal-top-level" (0xffffded0)



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03  7:55             ` Daniel Colascione
@ 2014-04-03  9:08               ` Daniel Colascione
  2014-04-03 14:03                 ` Dmitry Antipov
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-03  9:08 UTC (permalink / raw)
  To: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 1021 bytes --]

On 04/03/2014 12:55 AM, Daniel Colascione wrote:
> On 04/03/2014 12:04 AM, Dmitry Antipov wrote:
>> On 04/03/2014 10:59 AM, Dmitry Antipov wrote:
>>
>>> 3. Run 'emacs -Q', then M-x byte-force-recompile
>>>     /path/to/trunk/lis/org
>>                      ^^^^^^^
>> Mean /path/to/trunk/lisp/org, i.e. all Org mode.
> 
> Nice work. What gave you the idea of using byte-force-recompile to
> repro? I'd tried a few other stress cases myself and couldn't find
> anything. Your repro works perfectly.
> 

Found the bug: that symbol's name is in pure storage, so we ignore the
value of sym->s.gcmarkbit and assume the symbol is always live: we
never put it on the free list, so we never set its function slot to
Vdead. Later, during another GC pass, conservative GC scanning happens
to find a pointer to the symbol. We begin marking it, descend into the
function slot, which is still pointing to the old, dead object value. We
try to mark memory being used for some other purpose and enter la-la land.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03  9:08               ` Daniel Colascione
@ 2014-04-03 14:03                 ` Dmitry Antipov
  2014-04-03 15:42                   ` Stefan Monnier
  0 siblings, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2014-04-03 14:03 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: 17168

[-- Attachment #1: Type: text/plain, Size: 668 bytes --]

On 04/03/2014 01:08 PM, Daniel Colascione wrote:

> Found the bug: that symbol's name is in pure storage, so we ignore the
> value of sym->s.gcmarkbit and assume the symbol is always live: we
> never put it on the free list, so we never set its function slot to
> Vdead. Later, during another GC pass, conservative GC scanning happens
> to find a pointer to the symbol. We begin marking it, descend into the
> function slot, which is still pointing to the old, dead object value. We
> try to mark memory being used for some other purpose and enter la-la land.

What about this workaround? Until we find a better solution,
this should prevent crashes at least.

Dmitry

[-- Attachment #2: bug17168_workaround.patch --]
[-- Type: text/x-patch, Size: 1477 bytes --]

=== modified file 'src/alloc.c'
--- src/alloc.c	2014-04-03 00:37:51 +0000
+++ src/alloc.c	2014-04-03 13:59:58 +0000
@@ -3382,6 +3382,13 @@
 
   CHECK_STRING (name);
 
+  /* If not loadup, avoid symbols with names from pure space.
+     Current GC has problems treating such a symbols - see
+     http://debbugs.gnu.org/cgi/bugreport.cgi?bug=17168.  */
+  if (NILP (Vpurify_flag) && PURE_POINTER_P (XPNTR (name)))
+    name = make_specified_string (SSDATA (name), SCHARS (name),
+				  SBYTES (name), STRING_MULTIBYTE (name));
+
   MALLOC_BLOCK_INPUT;
 
   if (symbol_free_list)
@@ -6174,6 +6181,12 @@
 	  break;
 	CHECK_ALLOCATED_AND_LIVE (live_symbol_p);
 	ptr->gcmarkbit = 1;
+	/* Attempt to catch bogus objects.  In particular, see
+	   http://debbugs.gnu.org/cgi/bugreport.cgi?bug=17168.  */
+	eassert (SYMBOLP (ptr->function)
+		 || CONSP (ptr->function)
+		 || COMPILEDP (ptr->function)
+		 || SUBRP (ptr->function));
 	mark_object (ptr->function);
 	mark_object (ptr->plist);
 	switch (ptr->redirect)
@@ -6601,6 +6614,12 @@
               if (!pure_p)
                 eassert (!STRING_MARKED_P (XSTRING (sym->s.name)));
               sym->s.gcmarkbit = 0;
+	      /* Attempt to catch bogus objects.  In particular, see
+		 http://debbugs.gnu.org/cgi/bugreport.cgi?bug=17168.  */
+	      eassert (SYMBOLP (sym->s.function)
+		       || CONSP (sym->s.function)
+		       || COMPILEDP (sym->s.function)
+		       || SUBRP (sym->s.function));
             }
         }
 


^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03 14:03                 ` Dmitry Antipov
@ 2014-04-03 15:42                   ` Stefan Monnier
  2014-04-03 16:47                     ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2014-04-03 15:42 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: 17168

> What about this workaround? Until we find a better solution,
> this should prevent crashes at least.

Let's try to find a better fix instead of another workaround around the
existing workaround.

So the existing workaround is here:

	    /* Check if the symbol was created during loadup.  In such a case
	       it might be pointed to by pure bytecode which we don't trace,
	       so we conservatively assume that it is live.  */
	    bool pure_p = PURE_POINTER_P (XSTRING (sym->s.name));

	    if (!sym->s.gcmarkbit && !pure_p)
	      {
		if (sym->s.redirect == SYMBOL_LOCALIZED)
		  xfree (SYMBOL_BLV (&sym->s));
		sym->s.next = symbol_free_list;
		symbol_free_list = &sym->s;
#if GC_MARK_STACK
		symbol_free_list->function = Vdead;
#endif
		++this_free;
	      }
	    else
	      {
		++num_used;
		if (!pure_p)
		  eassert (!STRING_MARKED_P (XSTRING (sym->s.name)));
		sym->s.gcmarkbit = 0;
	      }

I.e. any symbol with a pure name is assumed to be potentially reachable
from some pure objects.  But not only this assumption is wrong, but its
implementation is wrong as well: we just keep the symbol without making
sure we also keep the objects it points to.

Furthermore, in theory some pure object may very well point to a symbol
whose name was not made pure.  Worse, a pure object may point to several
other kinds of non-pure objects, so this special treatment we have for
symbols should really be applied to other "non-purifyable" objects.

How 'bout we change `purecopy' such that before doing

    /* Not purified, don't hash-cons.  */
    return obj;

it adds the object to a table of "objects pointed from pure space"?

This table should probably be a hash-table (for simplicity), and of
course we'd only add objects to it when the purecopy call was
a recursive call, not for toplevel calls (i.e. calling (purecopy
<process>) should not add <process> to the table since it's not pointed
to from a pure object, whereas (purecopy '(<process>)) should).


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03 15:42                   ` Stefan Monnier
@ 2014-04-03 16:47                     ` Daniel Colascione
  2014-04-03 17:49                       ` Dmitry Antipov
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-03 16:47 UTC (permalink / raw)
  To: Stefan Monnier, Dmitry Antipov; +Cc: 17168

[-- Attachment #1: Type: text/plain, Size: 270 bytes --]

On 04/03/2014 08:42 AM, Stefan Monnier wrote:
>> What about this workaround? Until we find a better solution,
>> this should prevent crashes at least.

What about just eliminating the concept of pure storage? Is it really
worth the complexity on modern systems?


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03 16:47                     ` Daniel Colascione
@ 2014-04-03 17:49                       ` Dmitry Antipov
  2014-04-03 17:51                         ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2014-04-03 17:49 UTC (permalink / raw)
  To: Daniel Colascione, Stefan Monnier; +Cc: 17168

On 04/03/2014 08:47 PM, Daniel Colascione wrote:

> What about just eliminating the concept of pure storage? Is it really
> worth the complexity on modern systems?

Maybe; but now I'm thinking about just releasing pretest free from
serious known issues.

Dmitry






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03 17:49                       ` Dmitry Antipov
@ 2014-04-03 17:51                         ` Daniel Colascione
  2014-04-03 19:21                           ` Stefan Monnier
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-03 17:51 UTC (permalink / raw)
  To: Dmitry Antipov, Stefan Monnier; +Cc: 17168

[-- Attachment #1: Type: text/plain, Size: 434 bytes --]

On 04/03/2014 10:49 AM, Dmitry Antipov wrote:
> On 04/03/2014 08:47 PM, Daniel Colascione wrote:
> 
>> What about just eliminating the concept of pure storage? Is it really
>> worth the complexity on modern systems?
> 
> Maybe; but now I'm thinking about just releasing pretest free from
> serious known issues.

Sure; I don't think it's too late to take pure storage out of 24.4
though. Pure storage is an optimization.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03 17:51                         ` Daniel Colascione
@ 2014-04-03 19:21                           ` Stefan Monnier
  2014-04-03 19:22                             ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2014-04-03 19:21 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Dmitry Antipov, 17168

> Sure; I don't think it's too late to take pure storage out of 24.4

It is definitely too late for that.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03 19:21                           ` Stefan Monnier
@ 2014-04-03 19:22                             ` Daniel Colascione
  2014-04-05 22:37                               ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-03 19:22 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 260 bytes --]

On 04/03/2014 12:21 PM, Stefan Monnier wrote:
>> Sure; I don't think it's too late to take pure storage out of 24.4
> 
> It is definitely too late for that.

Okay. Let's try your proposed solution then. I'll see whether I can code
something up today.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-03 19:22                             ` Daniel Colascione
@ 2014-04-05 22:37                               ` Daniel Colascione
  2014-04-06  5:05                                 ` Dmitry Antipov
  2014-04-06 12:36                                 ` Stefan Monnier
  0 siblings, 2 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-05 22:37 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 16519 bytes --]

On 04/03/2014 12:22 PM, Daniel Colascione wrote:
> On 04/03/2014 12:21 PM, Stefan Monnier wrote:
>>> Sure; I don't think it's too late to take pure storage out of 24.4
>>
>> It is definitely too late for that.
> 
> Okay. Let's try your proposed solution then. I'll see whether I can code
> something up today.

The patch came out more complicated than I'd hoped. Basically, we define
a new variable Vpure_reachable, accessible only from C. Early in
startup, we make it a plain list and cons reachable but non-pure objects
from Fpurecopy onto it. Once we have hash tables available, we turn it
into a hash table. At the end of loadup, instead of just setting
purify-flag to nil, we call a new subr finalize-pure-storage.

finalize-pure-storage sets purify-flag to nil by side effect and, as new
behavior, makes purify-flag constant so that it can never again become
non-nil. Before returning, finalize-pure-storage also turns
Vpure_reachable into a vector *in pure storage* of objects we need to
keep around. Fgarbage_collect knows how to mark objects in
Vpure_reachable and understands that if Vpure_reachable is a vector, its
contents should be marked, not the vector itself.

This scheme works and passes Dmitry's test, but the resulting
Vpure_reachable vector has over 8,000 items. Most of these items are
ordinary interned symbols. As an optimization, when we build the final
vector form of Fpure_reachable, we see whether each item is a symbol
interned in the initial obarray. If it is, then instead of adding it to
the vector, we mark the symbol as un-uninternable, and add code to
Funintern to look for this new flag. After this optimization,
Vpure-reachable only has 251 elements.

Please review.

=== modified file 'lisp/loadup.el'
--- lisp/loadup.el	2014-02-10 01:34:22 +0000
+++ lisp/loadup.el	2014-04-05 22:24:34 +0000
@@ -56,7 +56,7 @@
 	  t))
     (let ((dir (car load-path)))
       ;; We'll probably overflow the pure space.
-      (setq purify-flag nil)
+      (finalize-pure-storage)
       (setq load-path (list (expand-file-name "." dir)
 			    (expand-file-name "emacs-lisp" dir)
 			    (expand-file-name "language" dir)
@@ -389,12 +389,11 @@
     (message "Pure-hashed: %d strings, %d vectors, %d conses, %d
bytecodes, %d others"
              strings vectors conses bytecodes others)))

-;; Avoid error if user loads some more libraries now and make sure the
-;; hash-consing hash table is GC'd.
-(setq purify-flag nil)
-
-(if (null (garbage-collect))
-    (setq pure-space-overflow t))
+;; Runs garbage-collect and sets purify-flag to nil by side effect.
+(when (and purify-flag
+           (progn (finalize-pure-storage)
+                  (not (garbage-collect))))
+  (setq pure-space-overflow t))

 (if (or (member (nth 3 command-line-args) '("dump" "bootstrap"))
 	(member (nth 4 command-line-args) '("dump" "bootstrap")))

=== modified file 'src/alloc.c'
--- src/alloc.c	2014-04-03 09:50:58 +0000
+++ src/alloc.c	2014-04-05 22:30:18 +0000
@@ -173,6 +173,14 @@
 static char *purebeg;
 static ptrdiff_t pure_size;

+/* Data structure holding non-pure objects reachable from objects in
+   pure storage.  Initially a list, since we need this data structure
+   before we've initialized enough of Emacs to make hash tables.  We
+   transform it into a hash table when hash tables become available.
+   In `finalize-pure-storage', we turn Vpure_reachable into a vector in
+   pure storage.  */
+static Lisp_Object Vpure_reachable;
+
 /* Number of bytes of pure storage used before pure storage overflowed.
    If this is non-zero, this implies that an overflow occurred.  */

@@ -196,6 +204,8 @@

 const char *pending_malloc_warning;

+static Lisp_Object purecopy_1 (Lisp_Object obj, bool top_level);
+
 #if 0 /* Normally, pointer sanity only on request... */
 #ifdef ENABLE_CHECKING
 #define SUSPICIOUS_OBJECT_CHECKING 1
@@ -5228,8 +5238,8 @@
   Lisp_Object new;
   struct Lisp_Cons *p = pure_alloc (sizeof *p, Lisp_Cons);
   XSETCONS (new, p);
-  XSETCAR (new, Fpurecopy (car));
-  XSETCDR (new, Fpurecopy (cdr));
+  XSETCAR (new, purecopy_1 (car, false));
+  XSETCDR (new, purecopy_1 (cdr, false));
   return new;
 }

@@ -5261,12 +5271,8 @@
   return new;
 }

-
-DEFUN ("purecopy", Fpurecopy, Spurecopy, 1, 1, 0,
-       doc: /* Make a copy of object OBJ in pure storage.
-Recursively copies contents of vectors and cons cells.
-Does not copy symbols.  Copies strings without text properties.  */)
-  (register Lisp_Object obj)
+static Lisp_Object
+purecopy_1 (Lisp_Object obj, bool top_level)
 {
   if (NILP (Vpurify_flag))
     return obj;
@@ -5300,7 +5306,7 @@
 	size &= PSEUDOVECTOR_SIZE_MASK;
       vec = XVECTOR (make_pure_vector (size));
       for (i = 0; i < size; i++)
-	vec->contents[i] = Fpurecopy (AREF (obj, i));
+	vec->contents[i] = purecopy_1 (AREF (obj, i), false);
       if (COMPILEDP (obj))
 	{
 	  XSETPVECTYPE (vec, PVEC_COMPILED);
@@ -5311,9 +5317,20 @@
     }
   else if (MARKERP (obj))
     error ("Attempt to copy a marker to pure storage");
-  else
+  else if (top_level)
     /* Not purified, don't hash-cons.  */
     return obj;
+  else if (!INTEGERP (obj) && !EQ (obj, Qt) && !EQ (obj, Qnil))
+    {
+      /* Object is reachable from a pure object, so we need remember
+         it as a GC root: we don't mark pure objects themselves.  */
+      if (NILP (Vpure_reachable) || CONSP (Vpure_reachable))
+        Vpure_reachable = Fcons (obj, Vpure_reachable);
+      else
+        Fputhash (obj, Qnil, Vpure_reachable);
+
+      return obj;
+    }

   if (HASH_TABLE_P (Vpurify_flag)) /* Hash consing.  */
     Fputhash (obj, obj, Vpurify_flag);
@@ -5322,6 +5339,73 @@
 }


+DEFUN ("purecopy", Fpurecopy, Spurecopy, 1, 1, 0,
+       doc: /* Make a copy of object OBJ in pure storage.
+Recursively copies contents of vectors and cons cells.
+Does not copy symbols.  Copies strings without text properties.  */)
+  (register Lisp_Object obj)
+{
+  return purecopy_1 (obj, true);
+}
+
+DEFUN ("finalize-pure-storage", Ffinalize_pure_storage,
+       Sfinalize_pure_storage, 0, 0, 0,
+       doc: /* Finishes building pure storage.
+May be called only once, with purify-flag non-nil.  */)
+     (void)
+{
+  struct Lisp_Hash_Table *h;
+  ptrdiff_t nr_reachable;
+  Lisp_Object new_pure_reachable;
+  Lisp_Object reachable_object;
+  ptrdiff_t i;
+  Lisp_Object reachable_objects;
+
+  if (NILP (Vpurify_flag))
+    error ("Purification not started");
+
+  eassert (HASH_TABLE_P (Vpure_reachable));
+  h = XHASH_TABLE (Vpure_reachable);
+
+  reachable_objects = Qnil;
+  nr_reachable = 0;
+
+  for (i = 0; i < HASH_TABLE_SIZE (h); ++i)
+    if (!NILP (HASH_HASH (h, i)))
+      {
+        reachable_object = HASH_KEY (h, i);
+        if (SYMBOLP (reachable_object))
+          {
+            if (SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (reachable_object))
+              XSYMBOL (reachable_object)->interned =
+                SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN;
+
+            if (XSYMBOL (reachable_object)->interned
+                == SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN)
+              {
+                /* No need to remember this object, since it's already
+                   on the main obarray and won't be uninterned.  */
+                continue;
+              }
+          }
+
+        nr_reachable += 1;
+        reachable_objects = Fcons (reachable_object, reachable_objects);
+      }
+
+  new_pure_reachable = make_pure_vector (nr_reachable);
+  for (i = 0; CONSP (reachable_objects); ++i)
+    {
+      XVECTOR (new_pure_reachable)->contents[i] = XCAR (reachable_objects);
+      reachable_objects = XCDR (reachable_objects);
+    }
+
+  XSYMBOL (intern_c_string ("purify-flag"))->constant = 1;
+  Vpurify_flag = Qnil;
+  Vpure_reachable = new_pure_reachable;
+  return Qnil;
+}
+
 \f
 /***********************************************************************
 			  Protection from GC
@@ -5578,6 +5662,19 @@
   for (i = 0; i < staticidx; i++)
     mark_object (*staticvec[i]);

+  if (VECTORP (Vpure_reachable))
+    {
+      /* Vpure_reachable is a pure-allocated vector of objects
+         reachable from pure storage.  We can't mark it, but we can
+         mark its contents.  */
+      struct Lisp_Vector* pv = XVECTOR (Vpure_reachable);
+      eassert (PURE_POINTER_P (pv));
+      for (i = 0; i < pv->header.size; ++i)
+        mark_object (pv->contents[i]);
+    }
+  else
+    mark_object (Vpure_reachable);
+
   mark_specpdl ();
   mark_terminals ();
   mark_kboards ();
@@ -6581,12 +6678,7 @@

       for (; sym < end; ++sym)
         {
-          /* Check if the symbol was created during loadup.  In such a case
-             it might be pointed to by pure bytecode which we don't trace,
-             so we conservatively assume that it is live.  */
-          bool pure_p = PURE_POINTER_P (XSTRING (sym->s.name));
-
-          if (!sym->s.gcmarkbit && !pure_p)
+          if (!sym->s.gcmarkbit)
             {
               if (sym->s.redirect == SYMBOL_LOCALIZED)
                 xfree (SYMBOL_BLV (&sym->s));
@@ -6600,8 +6692,6 @@
           else
             {
               ++num_used;
-              if (!pure_p)
-                eassert (!STRING_MARKED_P (XSTRING (sym->s.name)));
               sym->s.gcmarkbit = 0;
               /* Attempt to catch bogus objects.  */
               eassert (valid_lisp_object_p (sym->s.function) >= 1);
@@ -6922,6 +7012,9 @@
   /* Used to do Vpurify_flag = Qt here, but Qt isn't set up yet!  */
   purebeg = PUREBEG;
   pure_size = PURESIZE;
+#ifdef ENABLE_CHECKING
+  Vpure_reachable = make_number (-1);
+#endif

 #if GC_MARK_STACK || defined GC_MALLOC_CHECK
   mem_init ();
@@ -6941,6 +7034,39 @@
 }

 void
+init_alloc_once_post_obarray (void)
+{
+  /* This function is called after Qnil and Qt make sense.  Qt is
+  correct even if CANNOT_DUMP.  loadup.el will set to nil at end.  */
+  Vpurify_flag = Qt;
+  Vpure_reachable = Qnil;
+  /* We don't need to staticpro Vpure_reachable as we mark is specially
+     in Fgarbage_collect.  */
+}
+
+void
+init_alloc_once_post_hash_tables (void)
+{
+  /* This function is called after hash tables become available.  Make
+     Vpure_reachable a hash table for more efficiency.  */
+  Lisp_Object reachable_list = Vpure_reachable;
+  Lisp_Object new_pure_reachable =
+    make_hash_table (hashtest_eq,
+                     make_number (DEFAULT_HASH_SIZE),
+                     make_float (DEFAULT_REHASH_SIZE),
+                     make_float (DEFAULT_REHASH_THRESHOLD),
+                     Qnil);
+
+  while (CONSP (reachable_list))
+    {
+      Fputhash (XCAR (reachable_list), Qnil, new_pure_reachable);
+      reachable_list = XCDR (reachable_list);
+    }
+
+  Vpure_reachable = new_pure_reachable;
+}
+
+void
 init_alloc (void)
 {
   gcprolist = 0;
@@ -7068,6 +7194,7 @@
   defsubr (&Smake_symbol);
   defsubr (&Smake_marker);
   defsubr (&Spurecopy);
+  defsubr (&Sfinalize_pure_storage);
   defsubr (&Sgarbage_collect);
   defsubr (&Smemory_limit);
   defsubr (&Smemory_use_counts);

=== modified file 'src/emacs.c'
--- src/emacs.c	2014-04-03 07:14:02 +0000
+++ src/emacs.c	2014-04-05 20:33:09 +0000
@@ -1171,6 +1171,7 @@
     {
       init_alloc_once ();
       init_obarray ();
+      init_alloc_once_post_obarray ();
       init_eval_once ();
       init_charset_once ();
       init_coding_once ();
@@ -1198,6 +1199,7 @@
       /* Called before syms_of_fileio, because it sets up
Qerror_condition.  */
       syms_of_data ();
       syms_of_fns ();	   /* Before syms_of_charset which uses
hashtables.  */
+      init_alloc_once_post_hash_tables ();
       syms_of_fileio ();
       /* Before syms_of_coding to initialize Vgc_cons_threshold.  */
       syms_of_alloc ();
@@ -2078,7 +2080,6 @@
 You must run Emacs in batch mode in order to dump it.  */)
   (Lisp_Object filename, Lisp_Object symfile)
 {
-  Lisp_Object tem;
   Lisp_Object symbol;
   ptrdiff_t count = SPECPDL_INDEX ();

@@ -2090,6 +2091,9 @@
   if (!might_dump)
     error ("Emacs can be dumped only once");

+  if (!NILP (Vpurify_flag))
+    error ("Purification must have completed before dumping");
+
 #ifdef GNU_LINUX

   /* Warn if the gap between BSS end and heap start is larger than
this.  */
@@ -2127,9 +2131,6 @@
 	}
     }

-  tem = Vpurify_flag;
-  Vpurify_flag = Qnil;
-
 #ifdef HAVE_TZSET
   set_time_zone_rule (dump_tz);
 #ifndef LOCALTIME_CACHE
@@ -2173,8 +2174,6 @@
   reset_image_types ();
 #endif

-  Vpurify_flag = tem;
-
   return unbind_to (count, Qnil);
 }


=== modified file 'src/fns.c'
--- src/fns.c	2014-04-01 20:18:12 +0000
+++ src/fns.c	2014-04-05 21:39:19 +0000
@@ -3483,8 +3483,9 @@
 			 Low-level Functions
  ***********************************************************************/

-static struct hash_table_test hashtest_eq;
-struct hash_table_test hashtest_eql, hashtest_equal;
+struct hash_table_test hashtest_eq;
+struct hash_table_test hashtest_eql;
+struct hash_table_test hashtest_equal;

 /* Compare KEY1 which has hash code HASH1 and KEY2 with hash code
    HASH2 in hash table H using `eql'.  Value is true if KEY1 and

=== modified file 'src/lisp.h'
--- src/lisp.h	2014-04-03 00:18:08 +0000
+++ src/lisp.h	2014-04-05 22:13:57 +0000
@@ -1537,7 +1537,8 @@
 {
   SYMBOL_UNINTERNED = 0,
   SYMBOL_INTERNED = 1,
-  SYMBOL_INTERNED_IN_INITIAL_OBARRAY = 2
+  SYMBOL_INTERNED_IN_INITIAL_OBARRAY = 2,
+  SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN = 3
 };

 enum symbol_redirect
@@ -1658,7 +1659,14 @@
 INLINE bool
 SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (Lisp_Object sym)
 {
-  return XSYMBOL (sym)->interned == SYMBOL_INTERNED_IN_INITIAL_OBARRAY;
+  return XSYMBOL (sym)->interned >= SYMBOL_INTERNED_IN_INITIAL_OBARRAY;
+}
+
+INLINE bool
+SYMBOL_CANNOT_UNINTERN_P (Lisp_Object sym)
+{
+  return XSYMBOL (sym)->interned ==
+    SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN;
 }

 /* Value is non-zero if symbol is considered a constant, i.e. its
@@ -3450,7 +3458,7 @@
 ptrdiff_t hash_lookup (struct Lisp_Hash_Table *, Lisp_Object,
EMACS_UINT *);
 ptrdiff_t hash_put (struct Lisp_Hash_Table *, Lisp_Object, Lisp_Object,
 		    EMACS_UINT);
-extern struct hash_table_test hashtest_eql, hashtest_equal;
+extern struct hash_table_test hashtest_eq, hashtest_eql, hashtest_equal;

 extern Lisp_Object substring_both (Lisp_Object, ptrdiff_t, ptrdiff_t,
 				   ptrdiff_t, ptrdiff_t);
@@ -3741,6 +3749,8 @@
 extern void free_marker (Lisp_Object);
 extern void free_cons (struct Lisp_Cons *);
 extern void init_alloc_once (void);
+extern void init_alloc_once_post_obarray (void);
+extern void init_alloc_once_post_hash_tables (void);
 extern void init_alloc (void);
 extern void syms_of_alloc (void);
 extern struct buffer * allocate_buffer (void);

=== modified file 'src/lread.c'
--- src/lread.c	2014-02-25 22:51:34 +0000
+++ src/lread.c	2014-04-05 22:11:09 +0000
@@ -3895,10 +3895,17 @@
   if (SYMBOLP (name) && !EQ (name, tem))
     return Qnil;

-  /* There are plenty of other symbols which will screw up the Emacs
-     session if we unintern them, as well as even more ways to use
-     `setq' or `fset' or whatnot to make the Emacs session
-     unusable.  Let's not go down this silly road.  --Stef  */
+  if (XSYMBOL (tem)->interned
+      == SYMBOL_INTERNED_IN_INITIAL_OBARRAY_CANNOT_UNINTERN)
+    {
+      /* We can't unintern this symbol because pure storage might
+         refer to it.  If we were to allow uninterning, we'd have to
+         remember these symbols as GC roots elsewhere, and if the user
+         later re-interned them, the core functionality would refer to
+         symbols with a different name.  */
+      error ("Attempt to unintern symbol in Emacs core");
+    }
+
   /* if (EQ (tem, Qnil) || EQ (tem, Qt))
        error ("Attempt to unintern t or nil"); */

@@ -4052,9 +4059,6 @@
   XSYMBOL (Qnil)->declared_special = 1;
   XSYMBOL (Qt)->constant = 1;

-  /* Qt is correct even if CANNOT_DUMP.  loadup.el will set to nil at
end.  */
-  Vpurify_flag = Qt;
-
   DEFSYM (Qvariable_documentation, "variable-documentation");

   read_buffer = xmalloc (size);



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-05 22:37                               ` Daniel Colascione
@ 2014-04-06  5:05                                 ` Dmitry Antipov
  2014-04-06  5:11                                   ` Daniel Colascione
  2014-04-06 12:36                                 ` Stefan Monnier
  1 sibling, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2014-04-06  5:05 UTC (permalink / raw)
  To: Daniel Colascione, Stefan Monnier; +Cc: 17168

On 04/06/2014 02:37 AM, Daniel Colascione wrote:

> The patch came out more complicated than I'd hoped.

My first impression is that the patch to remove pure storage at all will have nearly
the same size and complexity. I.e. the whole thing becomes grossly overengineered.

Dmitry






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06  5:05                                 ` Dmitry Antipov
@ 2014-04-06  5:11                                   ` Daniel Colascione
  2014-04-06 18:00                                     ` Richard Stallman
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06  5:11 UTC (permalink / raw)
  To: Dmitry Antipov, Stefan Monnier; +Cc: 17168

[-- Attachment #1: Type: text/plain, Size: 424 bytes --]

On 04/05/2014 10:05 PM, Dmitry Antipov wrote:
> I.e. the whole thing becomes grossly
> overengineered.

Compared to what? We can't leave the bug in the code. We can't go back
to precise GC. The pure symbol-name thing is a hacky workaround, and
there may be other bugs lurking. It's either this or removing pure
storage, and at least this code is already written and maintains
resource consumption at 24.3 levels.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-05 22:37                               ` Daniel Colascione
  2014-04-06  5:05                                 ` Dmitry Antipov
@ 2014-04-06 12:36                                 ` Stefan Monnier
  2014-04-06 15:06                                   ` Eli Zaretskii
                                                     ` (2 more replies)
  1 sibling, 3 replies; 59+ messages in thread
From: Stefan Monnier @ 2014-04-06 12:36 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Dmitry Antipov, 17168

> This scheme works and passes Dmitry's test, but the resulting
> Vpure_reachable vector has over 8,000 items. Most of these items are
> ordinary interned symbols.

What objects are there besides symbols in Vpure_reachable?
If we can reduce Vpure_reachable to only contain symbols, then we can
replace it with a `pinned' bit in the Lisp_Symbol struct and then walk
the list of symbols during mark, marking all those symbols with the
`pinned' bit.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 12:36                                 ` Stefan Monnier
@ 2014-04-06 15:06                                   ` Eli Zaretskii
  2014-04-06 15:59                                     ` Daniel Colascione
  2014-04-06 19:42                                     ` Stefan Monnier
  2014-04-06 15:46                                   ` Daniel Colascione
  2014-04-06 18:01                                   ` Richard Stallman
  2 siblings, 2 replies; 59+ messages in thread
From: Eli Zaretskii @ 2014-04-06 15:06 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: 17168, dmantipov

> From: Stefan Monnier <monnier@IRO.UMontreal.CA>
> Date: Sun, 06 Apr 2014 08:36:02 -0400
> Cc: Dmitry Antipov <dmantipov@yandex.ru>, 17168@debbugs.gnu.org
> 
> > This scheme works and passes Dmitry's test, but the resulting
> > Vpure_reachable vector has over 8,000 items. Most of these items are
> > ordinary interned symbols.
> 
> What objects are there besides symbols in Vpure_reachable?
> If we can reduce Vpure_reachable to only contain symbols, then we can
> replace it with a `pinned' bit in the Lisp_Symbol struct and then walk
> the list of symbols during mark, marking all those symbols with the
> `pinned' bit.

As an alternative, would it make sense to try to understand why the
problems started when they did?  IOW, how come we never saw this until
now?

In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
provided the last good revno (113938) and the first bad one (114268);
I looked at that range of revisions, and 114156 looks relevant.  How
about if we revert it and see if the problems go away?





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 12:36                                 ` Stefan Monnier
  2014-04-06 15:06                                   ` Eli Zaretskii
@ 2014-04-06 15:46                                   ` Daniel Colascione
  2014-04-06 19:58                                     ` Stefan Monnier
  2014-04-06 18:01                                   ` Richard Stallman
  2 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 15:46 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 975 bytes --]

On 04/06/2014 05:36 AM, Stefan Monnier wrote:
>> This scheme works and passes Dmitry's test, but the resulting
>> Vpure_reachable vector has over 8,000 items. Most of these items are
>> ordinary interned symbols.
> 
> What objects are there besides symbols in Vpure_reachable?

Just symbols for me.

> If we can reduce Vpure_reachable to only contain symbols, then we can
> replace it with a `pinned' bit in the Lisp_Symbol struct and then walk
> the list of symbols during mark, marking all those symbols with the
> `pinned' bit.

The pinned bit approach is exactly what I implemented, except that we
walk obarray, like we already do, instead of all symbols. Your approach
would require that we check for non-symbols in purecopy and reject them,
and it'd have a bigger performance impact, since we'd then need to walk
the entire symbol list essentially twice.

I'd strongly prefer the fully general approach in my patch. It isn't
*that* complicated.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 15:06                                   ` Eli Zaretskii
@ 2014-04-06 15:59                                     ` Daniel Colascione
  2014-04-06 16:19                                       ` Eli Zaretskii
  2014-04-06 19:42                                     ` Stefan Monnier
  1 sibling, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 15:59 UTC (permalink / raw)
  To: Eli Zaretskii, Stefan Monnier; +Cc: dmantipov, 17168

[-- Attachment #1: Type: text/plain, Size: 1549 bytes --]

On 04/06/2014 08:06 AM, Eli Zaretskii wrote:
>> From: Stefan Monnier <monnier@IRO.UMontreal.CA>
>> Date: Sun, 06 Apr 2014 08:36:02 -0400
>> Cc: Dmitry Antipov <dmantipov@yandex.ru>, 17168@debbugs.gnu.org
>>
>>> This scheme works and passes Dmitry's test, but the resulting
>>> Vpure_reachable vector has over 8,000 items. Most of these items are
>>> ordinary interned symbols.
>>
>> What objects are there besides symbols in Vpure_reachable?
>> If we can reduce Vpure_reachable to only contain symbols, then we can
>> replace it with a `pinned' bit in the Lisp_Symbol struct and then walk
>> the list of symbols during mark, marking all those symbols with the
>> `pinned' bit.
> 
> As an alternative, would it make sense to try to understand why the
> problems started when they did?  IOW, how come we never saw this until
> now?

Who knows? The problem arises we happen to form a pointer on the stack
to an undead symbol, and *any* code change could be responsible for our
doing that more frequently. I don't see you can blame it on 114156.

> In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
> provided the last good revno (113938) and the first bad one (114268);
> I looked at that range of revisions, and 114156 looks relevant.  How
> about if we revert it and see if the problems go away?

The bug would still be there, and we'd have no way to tell whether your
proposed change actually reduced its occurrence to a tolerable level.
Why would you want to do that instead of just fixing the bug?


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 15:59                                     ` Daniel Colascione
@ 2014-04-06 16:19                                       ` Eli Zaretskii
  2014-04-06 16:24                                         ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Eli Zaretskii @ 2014-04-06 16:19 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: dmantipov, 17168

> Date: Sun, 06 Apr 2014 08:59:55 -0700
> From: Daniel Colascione <dancol@dancol.org>
> CC: dmantipov@yandex.ru, 17168@debbugs.gnu.org
> 
> > As an alternative, would it make sense to try to understand why the
> > problems started when they did?  IOW, how come we never saw this until
> > now?
> 
> Who knows? The problem arises we happen to form a pointer on the stack
> to an undead symbol, and *any* code change could be responsible for our
> doing that more frequently. I don't see you can blame it on 114156.

Then how do you explain that we never saw such problems, in all the
years before?

> > In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
> > provided the last good revno (113938) and the first bad one (114268);
> > I looked at that range of revisions, and 114156 looks relevant.  How
> > about if we revert it and see if the problems go away?
> 
> The bug would still be there, and we'd have no way to tell whether your
> proposed change actually reduced its occurrence to a tolerable level.
> Why would you want to do that instead of just fixing the bug?

Because it's simpler, and because it just might be that the bug was
caused by that other changeset.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 16:19                                       ` Eli Zaretskii
@ 2014-04-06 16:24                                         ` Daniel Colascione
  2014-04-06 16:29                                           ` Eli Zaretskii
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 16:24 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: dmantipov, 17168

[-- Attachment #1: Type: text/plain, Size: 1464 bytes --]

On 04/06/2014 09:19 AM, Eli Zaretskii wrote:
>> Date: Sun, 06 Apr 2014 08:59:55 -0700
>> From: Daniel Colascione <dancol@dancol.org>
>> CC: dmantipov@yandex.ru, 17168@debbugs.gnu.org
>>
>>> As an alternative, would it make sense to try to understand why the
>>> problems started when they did?  IOW, how come we never saw this until
>>> now?
>>
>> Who knows? The problem arises we happen to form a pointer on the stack
>> to an undead symbol, and *any* code change could be responsible for our
>> doing that more frequently. I don't see you can blame it on 114156.
> 
> Then how do you explain that we never saw such problems, in all the
> years before?

It's probabilistic. How do you know we didn't?

>>> In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
>>> provided the last good revno (113938) and the first bad one (114268);
>>> I looked at that range of revisions, and 114156 looks relevant.  How
>>> about if we revert it and see if the problems go away?
>>
>> The bug would still be there, and we'd have no way to tell whether your
>> proposed change actually reduced its occurrence to a tolerable level.
>> Why would you want to do that instead of just fixing the bug?
> 
> Because it's simpler,

It's easy to make code that's simple and wrong.

> and because it just might be that the bug was
> caused by that other changeset.

How might that changeset in particular have caused the problem reports?


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 16:24                                         ` Daniel Colascione
@ 2014-04-06 16:29                                           ` Eli Zaretskii
  2014-04-06 16:37                                             ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Eli Zaretskii @ 2014-04-06 16:29 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: dmantipov, 17168

> Date: Sun, 06 Apr 2014 09:24:01 -0700
> From: Daniel Colascione <dancol@dancol.org>
> CC: monnier@IRO.UMontreal.CA, dmantipov@yandex.ru, 17168@debbugs.gnu.org
> 
> > Then how do you explain that we never saw such problems, in all the
> > years before?
> 
> It's probabilistic. How do you know we didn't?

Because Richard has been using that machine for years, and I very much
doubt that he changed his usage patterns lately.

> >>> In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
> >>> provided the last good revno (113938) and the first bad one (114268);
> >>> I looked at that range of revisions, and 114156 looks relevant.  How
> >>> about if we revert it and see if the problems go away?
> >>
> >> The bug would still be there, and we'd have no way to tell whether your
> >> proposed change actually reduced its occurrence to a tolerable level.
> >> Why would you want to do that instead of just fixing the bug?
> > 
> > Because it's simpler,
> 
> It's easy to make code that's simple and wrong.

I didn't suggest any new code.

> > and because it just might be that the bug was
> > caused by that other changeset.
> 
> How might that changeset in particular have caused the problem reports?

It is related to calling a function, and is in the same function from
which all the recent crashes started.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 16:29                                           ` Eli Zaretskii
@ 2014-04-06 16:37                                             ` Daniel Colascione
  2014-04-06 16:59                                               ` Eli Zaretskii
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 16:37 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: dmantipov, 17168

[-- Attachment #1: Type: text/plain, Size: 2102 bytes --]

On 04/06/2014 09:29 AM, Eli Zaretskii wrote:
>> Date: Sun, 06 Apr 2014 09:24:01 -0700
>> From: Daniel Colascione <dancol@dancol.org>
>> CC: monnier@IRO.UMontreal.CA, dmantipov@yandex.ru, 17168@debbugs.gnu.org
>>
>>> Then how do you explain that we never saw such problems, in all the
>>> years before?
>>
>> It's probabilistic. How do you know we didn't?
> 
> Because Richard has been using that machine for years, and I very much
> doubt that he changed his usage patterns lately.

Richard's not the only one who has seen this crash. Drew's also reported
GC crashes in odd, and different, places.

>>>>> In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
>>>>> provided the last good revno (113938) and the first bad one (114268);
>>>>> I looked at that range of revisions, and 114156 looks relevant.  How
>>>>> about if we revert it and see if the problems go away?
>>>>
>>>> The bug would still be there, and we'd have no way to tell whether your
>>>> proposed change actually reduced its occurrence to a tolerable level.
>>>> Why would you want to do that instead of just fixing the bug?
>>>
>>> Because it's simpler,
>>
>> It's easy to make code that's simple and wrong.
> 
> I didn't suggest any new code.

No: you're just suggesting leaving incorrect code in Emacs.

>>> and because it just might be that the bug was
>>> caused by that other changeset.
>>
>> How might that changeset in particular have caused the problem reports?
> 
> It is related to calling a function, and is in the same function from
> which all the recent crashes started.

You haven't identified a causal mechanism. Any recent change could have
caused enough of a shift in code generation or stack layout to cause
this problem, and because it manifests so seldom, it'd be hard to verify
that reverting any particular change "fixed" the problem.

Also, eval_sub does *everything*. It's no surprise that we saw the
crashes there. That's like saying "all crashes are associated with main,
this change affects main, and therefore this change is responsible."


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 16:37                                             ` Daniel Colascione
@ 2014-04-06 16:59                                               ` Eli Zaretskii
  2014-04-06 17:11                                                 ` Daniel Colascione
  2014-04-06 19:44                                                 ` Stefan Monnier
  0 siblings, 2 replies; 59+ messages in thread
From: Eli Zaretskii @ 2014-04-06 16:59 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: dmantipov, 17168

> Date: Sun, 06 Apr 2014 09:37:23 -0700
> From: Daniel Colascione <dancol@dancol.org>
> CC: monnier@IRO.UMontreal.CA, dmantipov@yandex.ru, 17168@debbugs.gnu.org
> 
> > Because Richard has been using that machine for years, and I very much
> > doubt that he changed his usage patterns lately.
> 
> Richard's not the only one who has seen this crash. Drew's also reported
> GC crashes in odd, and different, places.

Which seem unrelated, and started much later than Richard reported
his.

> >>>>> In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
> >>>>> provided the last good revno (113938) and the first bad one (114268);
> >>>>> I looked at that range of revisions, and 114156 looks relevant.  How
> >>>>> about if we revert it and see if the problems go away?
> >>>>
> >>>> The bug would still be there, and we'd have no way to tell whether your
> >>>> proposed change actually reduced its occurrence to a tolerable level.
> >>>> Why would you want to do that instead of just fixing the bug?
> >>>
> >>> Because it's simpler,
> >>
> >> It's easy to make code that's simple and wrong.
> > 
> > I didn't suggest any new code.
> 
> No: you're just suggesting leaving incorrect code in Emacs.

It's not incorrect, AFAIU.  It might be less optimal.

> >>> and because it just might be that the bug was
> >>> caused by that other changeset.
> >>
> >> How might that changeset in particular have caused the problem reports?
> > 
> > It is related to calling a function, and is in the same function from
> > which all the recent crashes started.
> 
> You haven't identified a causal mechanism. Any recent change could have
> caused enough of a shift in code generation or stack layout to cause
> this problem, and because it manifests so seldom, it'd be hard to verify
> that reverting any particular change "fixed" the problem.

I thought you had a test case.  If not, how did you verify that your
suggested changes do fix the problem?

> Also, eval_sub does *everything*. It's no surprise that we saw the
> crashes there. That's like saying "all crashes are associated with main,
> this change affects main, and therefore this change is responsible."

The change is related to calling a function whose symbol has certain
properties.  That sounds related to me, not just a random change
somewhere in eval_sub.

Anyway, it was just an idea which I thought would be easy to try.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 16:59                                               ` Eli Zaretskii
@ 2014-04-06 17:11                                                 ` Daniel Colascione
  2014-04-06 19:44                                                 ` Stefan Monnier
  1 sibling, 0 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 17:11 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: dmantipov, 17168

[-- Attachment #1: Type: text/plain, Size: 2946 bytes --]

On 04/06/2014 09:59 AM, Eli Zaretskii wrote:
>> Date: Sun, 06 Apr 2014 09:37:23 -0700
>> From: Daniel Colascione <dancol@dancol.org>
>> CC: monnier@IRO.UMontreal.CA, dmantipov@yandex.ru, 17168@debbugs.gnu.org
>>
>>> Because Richard has been using that machine for years, and I very much
>>> doubt that he changed his usage patterns lately.
>>
>> Richard's not the only one who has seen this crash. Drew's also reported
>> GC crashes in odd, and different, places.
> 
> Which seem unrelated, and started much later than Richard reported
> his.

With a bug like this, unpredictable, usage-pattern-dependent behavior is
expected.

>>>>>>> In http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15583#23, Richard
>>>>>>> provided the last good revno (113938) and the first bad one (114268);
>>>>>>> I looked at that range of revisions, and 114156 looks relevant.  How
>>>>>>> about if we revert it and see if the problems go away?
>>>>>>
>>>>>> The bug would still be there, and we'd have no way to tell whether your
>>>>>> proposed change actually reduced its occurrence to a tolerable level.
>>>>>> Why would you want to do that instead of just fixing the bug?
>>>>>
>>>>> Because it's simpler,
>>>>
>>>> It's easy to make code that's simple and wrong.
>>>
>>> I didn't suggest any new code.
>>
>> No: you're just suggesting leaving incorrect code in Emacs.
> 
> It's not incorrect, AFAIU.  It might be less optimal.

The current code isn't just sub-optimal. It's wrong. If you get unlucky
and try to mark a dead symbol, you will crash.

>>>>> and because it just might be that the bug was
>>>>> caused by that other changeset.
>>>>
>>>> How might that changeset in particular have caused the problem reports?
>>>
>>> It is related to calling a function, and is in the same function from
>>> which all the recent crashes started.
>>
>> You haven't identified a causal mechanism. Any recent change could have
>> caused enough of a shift in code generation or stack layout to cause
>> this problem, and because it manifests so seldom, it'd be hard to verify
>> that reverting any particular change "fixed" the problem.
> 
> I thought you had a test case.  If not, how did you verify that your
> suggested changes do fix the problem?

There is a test. Your proposed change does not cause the test to pass.
Even if it did, I would argue against substituting a real fix with your
change.

>> Also, eval_sub does *everything*. It's no surprise that we saw the
>> crashes there. That's like saying "all crashes are associated with main,
>> this change affects main, and therefore this change is responsible."
> 
> The change is related to calling a function whose symbol has certain
> properties.  That sounds related to me, not just a random change
> somewhere in eval_sub.

It's a dangling pointer. Changing slightly the way we chase that
dangling pointer won't change the overall result.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06  5:11                                   ` Daniel Colascione
@ 2014-04-06 18:00                                     ` Richard Stallman
  2014-04-06 18:10                                       ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2014-04-06 18:00 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: dmantipov, 17168

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

Thanks for writing a fix.  I think it can be simpler.

You accumulate a list of uninterned symbols whose names are pure.  Why
make this into a hash table and then a vector?  A list should suffice.

Or maybe some (or even all) uninterned symbols with pure string names
should be freed like all other symbols when not pointed to.  That
would be even simpler.

Is there really a need to avoid collecting some of them?

    As an optimization, when we build the final
    vector form of Fpure_reachable, we see whether each item is a symbol
    interned in the initial obarray. If it is, then instead of adding it to
    the vector, we mark the symbol as un-uninternable,

Or unintern could check whether the name is pure.

-- 
Dr Richard Stallman
President, Free Software Foundation
51 Franklin St
Boston MA 02110
USA
www.fsf.org  www.gnu.org
Skype: No way! That's nonfree (freedom-denying) software.
  Use Ekiga or an ordinary phone call.






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 12:36                                 ` Stefan Monnier
  2014-04-06 15:06                                   ` Eli Zaretskii
  2014-04-06 15:46                                   ` Daniel Colascione
@ 2014-04-06 18:01                                   ` Richard Stallman
  2014-04-06 19:58                                     ` Stefan Monnier
  2 siblings, 1 reply; 59+ messages in thread
From: Richard Stallman @ 2014-04-06 18:01 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: 17168, dmantipov

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

    If we can reduce Vpure_reachable to only contain symbols, then we can
    replace it with a `pinned' bit in the Lisp_Symbol struct and then walk
    the list of symbols during mark, marking all those symbols with the
    `pinned' bit.

It might be faster just to traverse a list of these symbols, since
there are not very many uninterned symbols with names that are pure.

-- 
Dr Richard Stallman
President, Free Software Foundation
51 Franklin St
Boston MA 02110
USA
www.fsf.org  www.gnu.org
Skype: No way! That's nonfree (freedom-denying) software.
  Use Ekiga or an ordinary phone call.






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 18:00                                     ` Richard Stallman
@ 2014-04-06 18:10                                       ` Daniel Colascione
  2014-04-06 19:06                                         ` Eli Zaretskii
  2014-04-07  7:49                                         ` martin rudalics
  0 siblings, 2 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 18:10 UTC (permalink / raw)
  To: rms; +Cc: dmantipov, 17168

[-- Attachment #1: Type: text/plain, Size: 1663 bytes --]

On 04/06/2014 11:00 AM, Richard Stallman wrote:
> You accumulate a list of uninterned symbols whose names are pure.  Why
> make this into a hash table 

To eliminate duplicates, of which there would otherwise be many.

> and then a vector?  

Because that's the best structure to fit in pure storage: the set of
needed symbols never changes, so why *not* turn it into a vector?

> A list should suffice.

No, it really doesn't.

> Or maybe some (or even all) uninterned symbols with pure string names
> should be freed like all other symbols when not pointed to. 

And how do you tell whether they're pointed to without marking the
pointing objects? If you try to mark objects in pure storage, you defeat
the whole point. This change is *exactly* what you need to decide
whether something points to a given symbol.

> check whether the name is pure.

Absolutely not: that's what got us into this mess in the first place.
The purify of a symbol's name should have no bearing on how we treat
that symbol. What matters is whether pure storage refers to an object;
the some of these objects are symbols with pure names is irrelevant.
Please, stop talking about the problem in terms of "symbols whose names
are pure".

Can everyone please stop bikeshedding this? Please read and review the
actual patch instead of suggesting non-solutions. The actual approach is
the simplest general approach that will preserve existing performance
characteristics. The only viable equally simple approach is simply
removing pure storage, and if pure storage works (it amounts to a
primitive kind of generational GC), we might as well keep it.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 18:10                                       ` Daniel Colascione
@ 2014-04-06 19:06                                         ` Eli Zaretskii
  2014-04-07  7:49                                         ` martin rudalics
  1 sibling, 0 replies; 59+ messages in thread
From: Eli Zaretskii @ 2014-04-06 19:06 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: dmantipov, 17168, rms

> Date: Sun, 06 Apr 2014 11:10:38 -0700
> From: Daniel Colascione <dancol@dancol.org>
> Cc: dmantipov@yandex.ru, 17168@debbugs.gnu.org
> 
> Can everyone please stop bikeshedding this? Please read and review the
> actual patch instead of suggesting non-solutions.

Why are you being so harsh?  We are not the enemy.





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 15:06                                   ` Eli Zaretskii
  2014-04-06 15:59                                     ` Daniel Colascione
@ 2014-04-06 19:42                                     ` Stefan Monnier
  1 sibling, 0 replies; 59+ messages in thread
From: Stefan Monnier @ 2014-04-06 19:42 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: 17168, dmantipov

> As an alternative, would it make sense to try to understand why the
> problems started when they did?  IOW, how come we never saw this
> until now?

I think it's just luck.  I remember we added the current hack when we
made dolist use an uninterned symbol, and in that case it also took us
a while to bump into the problem and then to track it down.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 16:59                                               ` Eli Zaretskii
  2014-04-06 17:11                                                 ` Daniel Colascione
@ 2014-04-06 19:44                                                 ` Stefan Monnier
  1 sibling, 0 replies; 59+ messages in thread
From: Stefan Monnier @ 2014-04-06 19:44 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: 17168, dmantipov

>> No: you're just suggesting leaving incorrect code in Emacs.
> It's not incorrect, AFAIU.  It might be less optimal.

No, it is incorrect.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 18:01                                   ` Richard Stallman
@ 2014-04-06 19:58                                     ` Stefan Monnier
  2014-04-07 16:56                                       ` Richard Stallman
  0 siblings, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2014-04-06 19:58 UTC (permalink / raw)
  To: Richard Stallman; +Cc: 17168, dmantipov

> It might be faster just to traverse a list of these symbols, since
> there are not very many uninterned symbols with names that are pure.

That will fail (i.e. core-dump) if someone later uninterns one of those
symbols pointed to from pure space.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 15:46                                   ` Daniel Colascione
@ 2014-04-06 19:58                                     ` Stefan Monnier
  2014-04-06 20:13                                       ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2014-04-06 19:58 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Dmitry Antipov, 17168

> The pinned bit approach is exactly what I implemented, except that we
> walk obarray, like we already do, instead of all symbols.

We already walk obarray during the mark phase, so I don't understand
what you mean here.

> Your approach would require that we check for non-symbols in purecopy
> and reject them,

Yes.

> and it'd have a bigger performance impact, since we'd
> then need to walk the entire symbol list essentially twice.

Indeed.  I don't expect it to be significant, tho.  As you point out we
already walk that list once during gc_sweep, so doing it one more time
should be very quick.  Also, I'd expect that a significant proportion of
all symbols would be marked with that bit, so scanning all symbols won't
take that much longer than the alternative of only scanning a vector of
pinned symbols.  Also scanning all symbols like gc_sweep means that the
scan is nicely sequential in memory.

> I'd strongly prefer the fully general approach in my patch. It isn't
> *that* complicated.

But it requires more memory, whereas we already have space for an extra
bit in the Lisp_Symbol struct.  I guess the main difference resides in
whether we want to allow uninterning pinned symbols.  If we do as you
suggest and disallow it, then indeed, I expect there to be rather few
uninterned pinned symbols so using a small auxiliary array makes sense.
But I'd rather we don't pay attention to a symbol's interned status, so
we can later unintern them.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 19:58                                     ` Stefan Monnier
@ 2014-04-06 20:13                                       ` Daniel Colascione
  2014-04-06 20:53                                         ` Daniel Colascione
  2014-04-06 21:08                                         ` Stefan Monnier
  0 siblings, 2 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 20:13 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 2919 bytes --]

On 04/06/2014 12:58 PM, Stefan Monnier wrote:
>> The pinned bit approach is exactly what I implemented, except that we
>> walk obarray, like we already do, instead of all symbols.
> 
> We already walk obarray during the mark phase, so I don't understand
> what you mean here.

I meant that, IIUC, you mean to "pin" symbols by adding a pinned bit to
Lisp_Symbol, then at sweep time, enumerating all symbols in all symbol
blocks and marking those with this bit set. My approach is similar,
except that instead of an explicit mark pass over the symbols, my patch
just relies on obarray to keep these symbols alive, then forbids
removing these symbols from obarray. This way, the existing walk over
obarray does the job of the all-symbols walk we'd need otherwise.

>> Your approach would require that we check for non-symbols in purecopy
>> and reject them,
> 
> Yes.

Well, we already do that for markers. Still, I don't like making general
mechanisms less general.

>> and it'd have a bigger performance impact, since we'd
>> then need to walk the entire symbol list essentially twice.
> 
> Indeed.  I don't expect it to be significant, tho.  As you point out we
> already walk that list once during gc_sweep, so doing it one more time
> should be very quick.  

Maybe. We might have tens of thousands of symbols. We don't GC that
often, sure, but the overhead isn't nothing.

On the other hand, if we're walking all symbols during marking, we can
avoid walking the initial obarray, since we already know which symbols
are interned there from the Lisp_Symbol interned field. (We can't use
the same approach for other obarrays because we want them to eventually
get GCed even if they have interned symbols.)

This approach still gives up generality and doesn't do much about the
complexity, but it does save us 350 words of pure storage.

> Also, I'd expect that a significant proportion of
> all symbols would be marked with that bit, so scanning all symbols won't
> take that much longer than the alternative of only scanning a vector of
> pinned symbols.  Also scanning all symbols like gc_sweep means that the
> scan is nicely sequential in memory.
> 
>> I'd strongly prefer the fully general approach in my patch. It isn't
>> *that* complicated.
> 
> But it requires more memory,

~350 machine words, all in pure storage.

> whereas we already have space for an extra
> bit in the Lisp_Symbol struct.  I guess the main difference resides in
> whether we want to allow uninterning pinned symbols.  If we do as you
> suggest and disallow it, then indeed, I expect there to be rather few
> uninterned pinned symbols so using a small auxiliary array makes sense.
> But I'd rather we don't pay attention to a symbol's interned status, so
> we can later unintern them.

Sure. But why would you ever want to unintern a symbol that pure storage
references?


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 20:13                                       ` Daniel Colascione
@ 2014-04-06 20:53                                         ` Daniel Colascione
  2014-04-06 21:08                                         ` Stefan Monnier
  1 sibling, 0 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 20:53 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

On 04/06/2014 01:13 PM, Daniel Colascione wrote:
> On 04/06/2014 12:58 PM, Stefan Monnier wrote:
>>> The pinned bit approach is exactly what I implemented, except that we
>>> walk obarray, like we already do, instead of all symbols.
>>
>> We already walk obarray during the mark phase, so I don't understand
>> what you mean here.
> 
> I meant that, IIUC, you mean to "pin" symbols by adding a pinned bit to
> Lisp_Symbol, then at sweep time, enumerating all symbols in all symbol

Err, at mark time, of course.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 20:13                                       ` Daniel Colascione
  2014-04-06 20:53                                         ` Daniel Colascione
@ 2014-04-06 21:08                                         ` Stefan Monnier
  2014-04-06 21:37                                           ` Daniel Colascione
  1 sibling, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2014-04-06 21:08 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Dmitry Antipov, 17168

> Sure.  But why would you ever want to unintern a symbol that pure storage
> references?

That's a good question.  But it cuts both ways: if we don't know why
it's done, it's hard to judge if it can be disallowed.
I must say I don't very much like this idea of special-casing the
obarray and the symbols interned therein.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 21:08                                         ` Stefan Monnier
@ 2014-04-06 21:37                                           ` Daniel Colascione
  2014-04-07 16:28                                             ` Stefan Monnier
  0 siblings, 1 reply; 59+ messages in thread
From: Daniel Colascione @ 2014-04-06 21:37 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, 17168

[-- Attachment #1: Type: text/plain, Size: 1273 bytes --]

On 04/06/2014 02:08 PM, Stefan Monnier wrote:
>> Sure.  But why would you ever want to unintern a symbol that pure storage
>> references?
> 
> That's a good question.  But it cuts both ways: if we don't know why
> it's done, it's hard to judge if it can be disallowed.
> I must say I don't very much like this idea of special-casing the
> obarray and the symbols interned therein.

I really can't think of any good reason why anyone would unintern a core
symbol --- all the uses I can think of would be better served by either
using advice or let-binding `obarray'. I sometimes use mass-uninterning
to get rid of stale function names when I'm developing an elisp package,
but this technique isn't useful for symbols referenced from pure
storage, and a good alternative is just resetting value and symbol
slots. (In fact, I shouldn't use unintern at all for this hack.) Since
it's neither safe nor useful to unintern core symbols, I don't think
it's worry about whether it's okay to forbid it.

Besides: we already special-case the initial obarray for keywordp.

Anyway, I'd like to get a fix into emacs-24 soon so we can make sure
we've fixed the GC bug. Are you vetoing the general approach used in
this patch? If so, I can look at alternatives.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 18:10                                       ` Daniel Colascione
  2014-04-06 19:06                                         ` Eli Zaretskii
@ 2014-04-07  7:49                                         ` martin rudalics
  2014-04-07  8:18                                           ` Dmitry Antipov
  1 sibling, 1 reply; 59+ messages in thread
From: martin rudalics @ 2014-04-07  7:49 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: 17168

 > The only viable equally simple approach is simply
 > removing pure storage, and if pure storage works (it amounts to a
 > primitive kind of generational GC), we might as well keep it.

I did not look into the order GC scans its roots so maybe this is a
silly request and you'd better disregard it.  Nevertheless here it is:

(1) Would it be possible to tell how many objects get marked exclusively
     by marking from pure storage?

(2) Would it be possible to tell how many objects get marked exclusively
     by marking from ambiguous roots, that is, due to using conservative
     collection?

Obviously, either (1) or (2) would be incorrect wrt the other, that is,
if an object gets marked from the stack and has been already marked from
pure storage that object would be counted under (1).  Still I think that
figures we'd get here could be useful.

martin





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-07  7:49                                         ` martin rudalics
@ 2014-04-07  8:18                                           ` Dmitry Antipov
  2014-04-07  9:20                                             ` martin rudalics
  0 siblings, 1 reply; 59+ messages in thread
From: Dmitry Antipov @ 2014-04-07  8:18 UTC (permalink / raw)
  To: martin rudalics; +Cc: 17168

On 04/07/2014 11:49 AM, martin rudalics wrote:

> (1) Would it be possible to tell how many objects get marked exclusively
>      by marking from pure storage?

Pure object is never marked itself, so there are no objects that
gets marked just because they're referenced from the pure object.

Dmitry






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-07  8:18                                           ` Dmitry Antipov
@ 2014-04-07  9:20                                             ` martin rudalics
  0 siblings, 0 replies; 59+ messages in thread
From: martin rudalics @ 2014-04-07  9:20 UTC (permalink / raw)
  To: Dmitry Antipov; +Cc: 17168

 > Pure object is never marked itself, so there are no objects that
 > gets marked just because they're referenced from the pure object.

I meant the objects reachable from Vpure_reachable.

martin





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 21:37                                           ` Daniel Colascione
@ 2014-04-07 16:28                                             ` Stefan Monnier
  2014-04-07 19:06                                               ` Daniel Colascione
  0 siblings, 1 reply; 59+ messages in thread
From: Stefan Monnier @ 2014-04-07 16:28 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Dmitry Antipov, 17168-done

> Anyway, I'd like to get a fix into emacs-24 soon so we can make sure
> we've fixed the GC bug.

I installed a fix into emacs-24, which lets all symbols be uninterned.

> Are you vetoing the general approach used in this patch?

No: I think disallowing unintern is a good idea, but not for emacs-24.

Indeed, as it turns out, the only non-pure objects referenced from pure
space are symbols and distinguishing uninterned from interned reduces
the number of such "pinned" objects from about 10K to about 250.

Rather than scan all symbols to find the pinned ones, the code
I installed into emacs-24 keeps a pointer to the first symbol_block
that contains a pinned symbol.  This way we only scan about 15K symbols
at the beginning of every GC cycle to mark those 10K pinned symbols.
Compared to keeping a vector of 10K object, this seems like
a good tradeoff.

For trunk, we could disallow uninterning pinned symbols, at which point
it's worth the trouble to build a vector of those 250 remaining
pinned symbols.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-06 19:58                                     ` Stefan Monnier
@ 2014-04-07 16:56                                       ` Richard Stallman
  0 siblings, 0 replies; 59+ messages in thread
From: Richard Stallman @ 2014-04-07 16:56 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: 17168, dmantipov

[[[ To any NSA and FBI agents reading my email: please consider    ]]]
[[[ whether defending the US Constitution against all enemies,     ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]

    > It might be faster just to traverse a list of these symbols, since
    > there are not very many uninterned symbols with names that are pure.

    That will fail (i.e. core-dump) if someone later uninterns one of those
    symbols pointed to from pure space.

We're talking about a list of the uninterned symbols.  Do you mean, if
someone uninterns an interned symbol with a pure name?  The other part
of the change is to stop that from happening.

-- 
Dr Richard Stallman
President, Free Software Foundation
51 Franklin St
Boston MA 02110
USA
www.fsf.org  www.gnu.org
Skype: No way! That's nonfree (freedom-denying) software.
  Use Ekiga or an ordinary phone call.






^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-07 16:28                                             ` Stefan Monnier
@ 2014-04-07 19:06                                               ` Daniel Colascione
  2014-04-07 20:42                                                 ` Stefan Monnier
  2014-04-08  7:14                                                 ` martin rudalics
  0 siblings, 2 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-07 19:06 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: Dmitry Antipov, 17168-done

[-- Attachment #1: Type: text/plain, Size: 1359 bytes --]

On 04/07/2014 09:28 AM, Stefan Monnier wrote:
>> Anyway, I'd like to get a fix into emacs-24 soon so we can make sure
>> we've fixed the GC bug.
> 
> I installed a fix into emacs-24, which lets all symbols be uninterned.
> 
>> Are you vetoing the general approach used in this patch?
> 
> No: I think disallowing unintern is a good idea, but not for emacs-24.

Thanks. I'll install my change in trunk. How should we prevent your
change merging into trunk?

> Indeed, as it turns out, the only non-pure objects referenced from pure
> space are symbols and distinguishing uninterned from interned reduces
> the number of such "pinned" objects from about 10K to about 250.
> 
> Rather than scan all symbols to find the pinned ones, the code
> I installed into emacs-24 keeps a pointer to the first symbol_block
> that contains a pinned symbol.  This way we only scan about 15K symbols
> at the beginning of every GC cycle to mark those 10K pinned symbols.
> Compared to keeping a vector of 10K object, this seems like
> a good tradeoff.

It's unfortunate that we still have to mark Vobarray even though we're
separately marking most of the symbols it contains, but I suppose it
doesn't matter all that much: because we've already marked most of the
symbols in the interned symbol chains, we'll short-circuit in
mark_object anyway.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-07 19:06                                               ` Daniel Colascione
@ 2014-04-07 20:42                                                 ` Stefan Monnier
  2014-04-08  7:14                                                 ` martin rudalics
  1 sibling, 0 replies; 59+ messages in thread
From: Stefan Monnier @ 2014-04-07 20:42 UTC (permalink / raw)
  To: Daniel Colascione; +Cc: Dmitry Antipov, 17168-done

> Thanks. I'll install my change in trunk. How should we prevent your
> change merging into trunk?

Don't prevent it.  Merge it (or wait for someone else to do it) first
and then install your change as a patch on top of it.


        Stefan





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-07 19:06                                               ` Daniel Colascione
  2014-04-07 20:42                                                 ` Stefan Monnier
@ 2014-04-08  7:14                                                 ` martin rudalics
  2014-04-08  8:47                                                   ` Daniel Colascione
  1 sibling, 1 reply; 59+ messages in thread
From: martin rudalics @ 2014-04-08  7:14 UTC (permalink / raw)
  To: Daniel Colascione, Stefan Monnier; +Cc: Dmitry Antipov, 17168-done

 > I'll install my change in trunk.

Please wait at least a couple of weeks.  Otherwise, Stefan's change will
hardly receive any testing and we are going to release a version with a
largely untested fix.

martin





^ permalink raw reply	[flat|nested] 59+ messages in thread

* bug#17168: 24.3.50; Segfault at mark_object
  2014-04-08  7:14                                                 ` martin rudalics
@ 2014-04-08  8:47                                                   ` Daniel Colascione
  0 siblings, 0 replies; 59+ messages in thread
From: Daniel Colascione @ 2014-04-08  8:47 UTC (permalink / raw)
  To: martin rudalics, Stefan Monnier; +Cc: Dmitry Antipov, 17168-done

[-- Attachment #1: Type: text/plain, Size: 292 bytes --]

On 04/08/2014 12:14 AM, martin rudalics wrote:
>> I'll install my change in trunk.
> 
> Please wait at least a couple of weeks.  Otherwise, Stefan's change will
> hardly receive any testing and we are going to release a version with a
> largely untested fix.
> 

Good point. Sure.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 901 bytes --]

^ permalink raw reply	[flat|nested] 59+ messages in thread

end of thread, other threads:[~2014-04-08  8:47 UTC | newest]

Thread overview: 59+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-04-02  7:44 bug#17168: 24.3.50; Segfault at mark_object Nicolas Richard
2014-04-02 15:53 ` Daniel Colascione
2014-04-02 17:59   ` Nicolas Richard
2014-04-02 16:29 ` Dmitry Antipov
2014-04-02 19:46   ` Daniel Colascione
2014-04-02 20:33     ` Daniel Colascione
2014-04-02 20:57       ` Nicolas Richard
2014-04-02 21:50         ` Daniel Colascione
2014-04-02 23:24           ` Stefan Monnier
2014-04-03  0:28             ` Daniel Colascione
2014-04-02 20:37     ` Eli Zaretskii
2014-04-02 20:40       ` Daniel Colascione
2014-04-02 20:55         ` Eli Zaretskii
2014-04-03  6:59         ` Dmitry Antipov
2014-04-03  7:04           ` Dmitry Antipov
2014-04-03  7:55             ` Daniel Colascione
2014-04-03  9:08               ` Daniel Colascione
2014-04-03 14:03                 ` Dmitry Antipov
2014-04-03 15:42                   ` Stefan Monnier
2014-04-03 16:47                     ` Daniel Colascione
2014-04-03 17:49                       ` Dmitry Antipov
2014-04-03 17:51                         ` Daniel Colascione
2014-04-03 19:21                           ` Stefan Monnier
2014-04-03 19:22                             ` Daniel Colascione
2014-04-05 22:37                               ` Daniel Colascione
2014-04-06  5:05                                 ` Dmitry Antipov
2014-04-06  5:11                                   ` Daniel Colascione
2014-04-06 18:00                                     ` Richard Stallman
2014-04-06 18:10                                       ` Daniel Colascione
2014-04-06 19:06                                         ` Eli Zaretskii
2014-04-07  7:49                                         ` martin rudalics
2014-04-07  8:18                                           ` Dmitry Antipov
2014-04-07  9:20                                             ` martin rudalics
2014-04-06 12:36                                 ` Stefan Monnier
2014-04-06 15:06                                   ` Eli Zaretskii
2014-04-06 15:59                                     ` Daniel Colascione
2014-04-06 16:19                                       ` Eli Zaretskii
2014-04-06 16:24                                         ` Daniel Colascione
2014-04-06 16:29                                           ` Eli Zaretskii
2014-04-06 16:37                                             ` Daniel Colascione
2014-04-06 16:59                                               ` Eli Zaretskii
2014-04-06 17:11                                                 ` Daniel Colascione
2014-04-06 19:44                                                 ` Stefan Monnier
2014-04-06 19:42                                     ` Stefan Monnier
2014-04-06 15:46                                   ` Daniel Colascione
2014-04-06 19:58                                     ` Stefan Monnier
2014-04-06 20:13                                       ` Daniel Colascione
2014-04-06 20:53                                         ` Daniel Colascione
2014-04-06 21:08                                         ` Stefan Monnier
2014-04-06 21:37                                           ` Daniel Colascione
2014-04-07 16:28                                             ` Stefan Monnier
2014-04-07 19:06                                               ` Daniel Colascione
2014-04-07 20:42                                                 ` Stefan Monnier
2014-04-08  7:14                                                 ` martin rudalics
2014-04-08  8:47                                                   ` Daniel Colascione
2014-04-06 18:01                                   ` Richard Stallman
2014-04-06 19:58                                     ` Stefan Monnier
2014-04-07 16:56                                       ` Richard Stallman
2014-04-02 20:49     ` Nicolas Richard

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).