* bug#36447: 27.0.50; New "Unknown keyword" errors @ 2019-06-30 18:23 Michael Heerdegen 2019-06-30 18:43 ` Eli Zaretskii 0 siblings, 1 reply; 53+ messages in thread From: Michael Heerdegen @ 2019-06-30 18:23 UTC (permalink / raw) To: 36447 Hi, I've just rebuilt Emacs from master and am getting "Unknown keyword" errors all the time. Magit broken, Gnus broken, I even got an error after typing M-x report-emacs-bug. This wasn't the case with a build from yesterday. Here is a backtrace after trying to reinstall Magit: Debugger entered--Lisp error: (error "Unknown keyword :link") signal(error ("Unknown keyword :link")) error("Unknown keyword %s" :link) custom-handle-keyword(ido :link (emacs-commentary-link :tag "Commentary" "ido.el") custom-group) custom-declare-group(ido nil "Switch between files using substrings." :group extensions :group convenience :version "22.1" :link (emacs-commentary-link :tag "Commentary" "ido.el") :link (emacs-library-link :tag "Lisp File" "ido.el") :link (custom-manual "(ido) Top") :link (info-link "(ido) Customization")) byte-code("\300\301\302\303\304\305\304\306\307\310\311\312\311\313\311\314\311\315&\21\210\316\317\320\321\322DD\323\324\325\326\327\330\301\311\331\332\333\334\335\304\301&\21\210\316\336\320\321..." [custom-declare-group ido nil "Switch between files using substrings." :group extensions convenience :version "22.1" :link (emacs-commentary-link :tag "Commentary" "ido.el") (emacs-library-link :tag "Lisp File" "ido.el") (custom-manual "(ido) Top") (info-link "(ido) Customization") custom-declare-variable ido-mode funcall function #f(compiled-function () #<bytecode 0x15696429abbd>) "Determines for which buffer/file Ido should be ena..." :set #f(compiled-function (symbol value) #<bytecode 0x156963c1e1fd>) :initialize custom-initialize-default :require (emacs-commentary-link "ido.el") :set-after (ido-save-directory-list-file ido-unc-hosts) :type (choice (const :tag "Turn on only buffer" buffer) (const :tag "Turn on only file" file) (const :tag "Turn on both buffer and file" both) (const :tag "Switch off all" nil)) ido-case-fold #f(compiled-function () #<bytecode 0x1569643a7081>) "Non-nil if searching of buffer and file names shou..." boolean ido-ignore-buffers #f(compiled-function () #<bytecode 0x156963be2c5d>) "List of regexps or functions matching buffer names..." (repeat (choice regexp function)) ido-ignore-files #f(compiled-function () #<bytecode 0x156963303d75>) "List of regexps or functions matching file names t..." (repeat (choice regexp function)) ido-ignore-extensions #f(compiled-function () #<bytecode 0x156963303d81>) "Non-nil means ignore files in `completion-ignored-..." ido-show-dot-for-dired #f(compiled-function () #<bytecode 0x1569639bd911>) "Non-nil means to always put . as the first item in..." ido-file-extensions-order #f(compiled-function () #<bytecode 0x1569639bd91d>) ...] 18) require(ido) (progn (require 'ido)) eval((progn (require 'ido)) t) #f(compiled-function (&rest body) "Like `progn', but evaluates the body at compile time if you're compiling.\nThus, the result of the body appears to the compiler as a quoted\nconstant. In interpreted code, this is entirely equivalent to\n`progn', except that the value of the expression may be (but is\nnot necessarily) computed at load time if eager macro expansion\nis enabled." #<bytecode 0x1fee9701cf8f>)((require 'ido)) (eval-when-compile (require 'ido)) eval-buffer(#<buffer *load*> nil "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." nil t) ; Reading at buffer position 1737 load-with-code-conversion("/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." nil t) load("/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." nil t) #f(compiled-function (feature) #<bytecode 0x156962c6204d>)("/home/micha/.emacs.d/elpa/magit-20190630.1329/magi...") mapc(#f(compiled-function (feature) #<bytecode 0x156962c6204d>) ("/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi..." "/home/micha/.emacs.d/elpa/magit-20190630.1329/magi...")) package--load-files-for-activation(#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind nil :archive nil :dir "/home/micha/.emacs.d/elpa/magit-20190630.1329" :extras ((:keywords "git" "tools" "vc")) :signed nil) :reload) package-activate-1(#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind nil :archive nil :dir "/home/micha/.emacs.d/elpa/magit-20190630.1329" :extras ((:keywords "git" "tools" "vc")) :signed nil) :reload :deps) package-unpack(#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind tar :archive "melpa" :dir nil :extras ((:commit . "23267cf33a7b690b27dc6760af8ab7f0886239ce") (:keywords "git" "tools" "vc")) :signed nil)) #f(compiled-function (&optional good-sigs) #<bytecode 0x156963531405>)(nil) #f(compiled-function () #<bytecode 0x156963439f75>)() package--with-response-buffer-1("http://melpa.org/packages/" #f(compiled-function () #<bytecode 0x15696353a4f9>) :file "magit-20190630.1329.tar.sig" :async nil :error-function #f(compiled-function () #<bytecode 0x156963439f75>) :noerror t) package--check-signature("http://melpa.org/packages/" "magit-20190630.1329.tar" "magit-20190630.1329/\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0..." nil #f(compiled-function (&optional good-sigs) #<bytecode 0x156963531405>)) #f(compiled-function () #<bytecode 0x156963be2c45>)() package--with-response-buffer-1("http://melpa.org/packages/" #f(compiled-function () #<bytecode 0x156963be2c45>) :file "magit-20190630.1329.tar" :async nil :error-function #f(compiled-function () #<bytecode 0x156962c4e1f9>) :noerror nil) package-install-from-archive(#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind tar :archive "melpa" :dir nil :extras ((:commit . "23267cf33a7b690b27dc6760af8ab7f0886239ce") (:keywords "git" "tools" "vc")) :signed nil)) mapc(package-install-from-archive (#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind tar :archive "melpa" :dir nil :extras ((:commit . "23267cf33a7b690b27dc6760af8ab7f0886239ce") (:keywords "git" "tools" "vc")) :signed nil))) package-download-transaction((#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind tar :archive "melpa" :dir nil :extras ((:commit . "23267cf33a7b690b27dc6760af8ab7f0886239ce") (:keywords "git" "tools" "vc")) :signed nil))) package-install(#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind tar :archive "melpa" :dir nil :extras ((:commit . "23267cf33a7b690b27dc6760af8ab7f0886239ce") (:keywords "git" "tools" "vc")) :signed nil) dont-select) package-menu--perform-transaction((#s(package-desc :name magit :version (20190630 1329) :summary "A Git porcelain inside Emacs." :reqs ((emacs (25 1)) (async (20180527)) (dash (20180910)) (git-commit (20181104)) (transient (20190528)) (with-editor (20181103))) :kind tar :archive "melpa" :dir nil :extras ((:commit . "23267cf33a7b690b27dc6760af8ab7f0886239ce") (:keywords "git" "tools" "vc")) :signed nil)) nil) package-menu-execute() Here is one after M-x report-emacs-bug: Debugger entered--Lisp error: (error "Unknown keyword :link") signal(error ("Unknown keyword :link")) error("Unknown keyword %s" :link) custom-handle-keyword(gnus-cite :link (custom-manual "(gnus)Article Highlighting") custom-group) custom-declare-group(gnus-cite nil "Citation." :prefix "gnus-cite-" :link (custom-manual "(gnus)Article Highlighting") :group gnus-article) byte-code("\300\301!\210\300\302!\210\300\303!\210\300\304!\210\305\306\307\310\311\312\313\314\315\316&\11\210\317\320\321\322\315\306\323\324&\7\210\317\325\326\327\315\306\323\324&\7..." [require gnus gnus-range gnus-art message custom-declare-group gnus-cite nil "Citation." :prefix "gnus-cite-" :link (custom-manual "(gnus)Article Highlighting") :group gnus-article custom-declare-variable gnus-cited-opened-text-button-line-format "%(%{[-]%}%)\n" "Format of opened cited text buttons." :type string gnus-cited-closed-text-button-line-format "%(%{[+]%}%)\n" "Format of closed cited text buttons." gnus-cited-lines-visible "The number of lines of hidden cited text to remain..." (choice (const :tag "none" nil) integer (cons :tag "Top and Bottom" integer integer)) gnus-cite-parse-max-size 25000 "Maximum article size (in bytes) where parsing cita..." (choice (const :tag "all" nil) integer) gnus-cite-max-prefix 20 "Maximum possible length for a citation prefix." integer gnus-supercite-regexp (concat "^\\(" message-cite-prefix-regexp "\\)? *" ">>>>> +\"\\([^\"\n]+\\)\" +==") "Regexp matching normal Supercite attribution lines..." regexp gnus-supercite-secondary-regexp "^.*\"\\([^\"\n]+\\)\" +==" "Regexp matching mangled Supercite attribution line..." gnus-cite-minimum-match-count 2 "Minimum number of identical prefixes before we bel..." gnus-cite-attribution-prefix "In article\\|in <\\|On \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sa..." "Regexp matching the beginning of an attribution li..." gnus-cite-attribution-suffix "\\(\\(wrote\\|writes\\|said\\|says\\|>\\)\\(:\\|\\.\\.\\.\\)\\|-..." ...] 10) gnus-message-citation-mode(1) #f(compiled-function () #<bytecode 0x1569635b0a49>)() gnus-msg-mail("bug-gnu-emacs@gnu.org" "27.0.50; Testbug" nil nil nil nil nil nil) compose-mail("bug-gnu-emacs@gnu.org" "27.0.50; Testbug") report-emacs-bug("Testbug") and here one after displaying an article in Gnus (RET in summary): Debugger entered--Lisp error: (error "Unknown keyword :link") signal(error ("Unknown keyword :link")) error("Unknown keyword %s" :link) custom-handle-keyword(gnus-cite :link (custom-manual "(gnus)Article Highlighting") custom-group) custom-declare-group(gnus-cite nil "Citation." :prefix "gnus-cite-" :link (custom-manual "(gnus)Article Highlighting") :group gnus-article) byte-code("\300\301!\210\300\302!\210\300\303!\210\300\304!\210\305\306\307\310\311\312\313\314\315\316&\11\210\317\320\321\322\315\306\323\324&\7\210\317\325\326\327\315\306\323\324&\7..." [require gnus gnus-range gnus-art message custom-declare-group gnus-cite nil "Citation." :prefix "gnus-cite-" :link (custom-manual "(gnus)Article Highlighting") :group gnus-article custom-declare-variable gnus-cited-opened-text-button-line-format "%(%{[-]%}%)\n" "Format of opened cited text buttons." :type string gnus-cited-closed-text-button-line-format "%(%{[+]%}%)\n" "Format of closed cited text buttons." gnus-cited-lines-visible "The number of lines of hidden cited text to remain..." (choice (const :tag "none" nil) integer (cons :tag "Top and Bottom" integer integer)) gnus-cite-parse-max-size 25000 "Maximum article size (in bytes) where parsing cita..." (choice (const :tag "all" nil) integer) gnus-cite-max-prefix 20 "Maximum possible length for a citation prefix." integer gnus-supercite-regexp (concat "^\\(" message-cite-prefix-regexp "\\)? *" ">>>>> +\"\\([^\"\n]+\\)\" +==") "Regexp matching normal Supercite attribution lines..." regexp gnus-supercite-secondary-regexp "^.*\"\\([^\"\n]+\\)\" +==" "Regexp matching mangled Supercite attribution line..." gnus-cite-minimum-match-count 2 "Minimum number of identical prefixes before we bel..." gnus-cite-attribution-prefix "In article\\|in <\\|On \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sa..." "Regexp matching the beginning of an attribution li..." gnus-cite-attribution-suffix "\\(\\(wrote\\|writes\\|said\\|says\\|>\\)\\(:\\|\\.\\.\\.\\)\\|-..." ...] 10) gnus-article-fill-cited-long-lines() gnus-treat-article(nil 1 1 "text/plain") gnus-display-mime() gnus-article-prepare-display() gnus-article-prepare(107571 nil) gnus-summary-display-article(107571 nil) gnus-summary-select-article(nil nil pseudo) gnus-summary-scroll-up(1) funcall-interactively(gnus-summary-scroll-up 1) This is all with my setup. Anyone feeling guilty? TIA, Michael. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-06-30 18:23 bug#36447: 27.0.50; New "Unknown keyword" errors Michael Heerdegen @ 2019-06-30 18:43 ` Eli Zaretskii 2019-06-30 21:44 ` Michael Heerdegen 0 siblings, 1 reply; 53+ messages in thread From: Eli Zaretskii @ 2019-06-30 18:43 UTC (permalink / raw) To: Michael Heerdegen; +Cc: 36447 > From: Michael Heerdegen <michael_heerdegen@web.de> > Date: Sun, 30 Jun 2019 20:23:33 +0200 > > I've just rebuilt Emacs from master and am getting "Unknown keyword" > errors all the time. Magit broken, Gnus broken, I even got an error > after typing M-x report-emacs-bug. This wasn't the case with a build > from yesterday. > > Here is a backtrace after trying to reinstall Magit: > > Debugger entered--Lisp error: (error "Unknown keyword :link") > signal(error ("Unknown keyword :link")) > error("Unknown keyword %s" :link) > custom-handle-keyword(ido :link (emacs-commentary-link :tag "Commentary" "ido.el") custom-group) d8aba87a0d5ef84dfedf4c6a73884fa6dc455b24? ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-06-30 18:43 ` Eli Zaretskii @ 2019-06-30 21:44 ` Michael Heerdegen 2019-07-01 12:25 ` Noam Postavsky 0 siblings, 1 reply; 53+ messages in thread From: Michael Heerdegen @ 2019-06-30 21:44 UTC (permalink / raw) To: Eli Zaretskii; +Cc: 36447 Eli Zaretskii <eliz@gnu.org> writes: > > Here is a backtrace after trying to reinstall Magit: > > > > Debugger entered--Lisp error: (error "Unknown keyword :link") > > signal(error ("Unknown keyword :link")) > > error("Unknown keyword %s" :link) > > custom-handle-keyword(ido :link (emacs-commentary-link :tag > > "Commentary" "ido.el") custom-group) > > d8aba87a0d5ef84dfedf4c6a73884fa6dc455b24? Yes, seems so. Reverting that commit and make bootstrap have fixed it for me. The bad thing is that only newly compiled files are concerned, and AFAIK make bootstrap is necessary to get rid of those. So it seems anyone who uses this commit and has compiled stuff since then has now broken elc's. Also probably for stuff installed with the package manager etc. Michael. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-06-30 21:44 ` Michael Heerdegen @ 2019-07-01 12:25 ` Noam Postavsky 2019-07-01 13:20 ` Pip Cet 2019-07-01 22:04 ` Michael Heerdegen 0 siblings, 2 replies; 53+ messages in thread From: Noam Postavsky @ 2019-07-01 12:25 UTC (permalink / raw) To: Michael Heerdegen; +Cc: 36447 Michael Heerdegen <michael_heerdegen@web.de> writes: > Eli Zaretskii <eliz@gnu.org> writes: > >> > Here is a backtrace after trying to reinstall Magit: >> > >> > Debugger entered--Lisp error: (error "Unknown keyword :link") >> > signal(error ("Unknown keyword :link")) >> > error("Unknown keyword %s" :link) >> > custom-handle-keyword(ido :link (emacs-commentary-link :tag >> > "Commentary" "ido.el") custom-group) >> >> d8aba87a0d5ef84dfedf4c6a73884fa6dc455b24? > > Yes, seems so. Reverting that commit and make bootstrap have fixed it > for me. I tried deleting all .elc files *without* reverting that commit, and (custom-handle-keyword 'ido :link '(emacs-commentary-link :tag "Commentary" "ido.el") 'custom-group) does not give me an error. Is it possible that it was just the 'make bootstrap' which fixed things? ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-01 12:25 ` Noam Postavsky @ 2019-07-01 13:20 ` Pip Cet 2019-07-01 22:04 ` Michael Heerdegen 1 sibling, 0 replies; 53+ messages in thread From: Pip Cet @ 2019-07-01 13:20 UTC (permalink / raw) To: Noam Postavsky; +Cc: Michael Heerdegen, 36447 On Mon, Jul 1, 2019 at 12:26 PM Noam Postavsky <npostavs@gmail.com> wrote: > does not give me an error. Is it possible that it was just the 'make > bootstrap' which fixed things? Just as a data point, I saw similar problems ("unknown keyword :group" errors) that went away after either of `make bootstrap' or re-evaluating custom.el (and installing non-compiled versions of it). So either something changed incompatibly about our bytecode, or some recent versions of emacs must have miscompiled custom.el. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-01 12:25 ` Noam Postavsky 2019-07-01 13:20 ` Pip Cet @ 2019-07-01 22:04 ` Michael Heerdegen 2019-07-02 1:59 ` Stefan Kangas 2019-07-02 13:29 ` Pip Cet 1 sibling, 2 replies; 53+ messages in thread From: Michael Heerdegen @ 2019-07-01 22:04 UTC (permalink / raw) To: Noam Postavsky; +Cc: 36447 Noam Postavsky <npostavs@gmail.com> writes: > Is it possible that it was just the 'make bootstrap' which fixed > things? Hmm - seems so, yes. I just made bootstrap with the commit included, and the problem is still gone. Strange thing, but seems there is nothing to fix. Thanks, Michael. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-01 22:04 ` Michael Heerdegen @ 2019-07-02 1:59 ` Stefan Kangas 2019-07-02 14:17 ` Eli Zaretskii 2019-07-02 13:29 ` Pip Cet 1 sibling, 1 reply; 53+ messages in thread From: Stefan Kangas @ 2019-07-02 1:59 UTC (permalink / raw) To: Michael Heerdegen; +Cc: Noam Postavsky, 36447 Michael Heerdegen <michael_heerdegen@web.de> writes: > > Noam Postavsky <npostavs@gmail.com> writes: > > > Is it possible that it was just the 'make bootstrap' which fixed > > things? > > Hmm - seems so, yes. I just made bootstrap with the commit included, > and the problem is still gone. Strange thing, but seems there is > nothing to fix. I saw this problem too, and can confirm that 'make bootstrap' fixed it for me as well. Thanks, Stefan Kangas ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-02 1:59 ` Stefan Kangas @ 2019-07-02 14:17 ` Eli Zaretskii 0 siblings, 0 replies; 53+ messages in thread From: Eli Zaretskii @ 2019-07-02 14:17 UTC (permalink / raw) To: Stefan Kangas; +Cc: michael_heerdegen, npostavs, 36447 > From: Stefan Kangas <stefan@marxist.se> > Date: Tue, 2 Jul 2019 03:59:11 +0200 > Cc: Noam Postavsky <npostavs@gmail.com>, 36447@debbugs.gnu.org > > I saw this problem too, and can confirm that 'make bootstrap' fixed it > for me as well. Deleting all the *.elc files and saying "make" should have been enough. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-01 22:04 ` Michael Heerdegen 2019-07-02 1:59 ` Stefan Kangas @ 2019-07-02 13:29 ` Pip Cet 2019-07-02 15:35 ` Michael Heerdegen 1 sibling, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-02 13:29 UTC (permalink / raw) To: Michael Heerdegen; +Cc: Noam Postavsky, 36447 On Tue, Jul 2, 2019 at 1:32 AM Michael Heerdegen <michael_heerdegen@web.de> wrote: > Noam Postavsky <npostavs@gmail.com> writes: > > > Is it possible that it was just the 'make bootstrap' which fixed > > things? > > Hmm - seems so, yes. I just made bootstrap with the commit included, > and the problem is still gone. Strange thing, but seems there is > nothing to fix. I'm not sure I agree. Something went wrong somewhere, or we wouldn't have called byte code with what looks like an invalid hash table. Do you still have the emacs binary that failed? ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-02 13:29 ` Pip Cet @ 2019-07-02 15:35 ` Michael Heerdegen 2019-07-02 16:20 ` Noam Postavsky 0 siblings, 1 reply; 53+ messages in thread From: Michael Heerdegen @ 2019-07-02 15:35 UTC (permalink / raw) To: Pip Cet; +Cc: Noam Postavsky, 36447 Pip Cet <pipcet@gmail.com> writes: > > Hmm - seems so, yes. I just made bootstrap with the commit > > included, and the problem is still gone. Strange thing, but seems > > there is nothing to fix. > > I'm not sure I agree. Something went wrong somewhere, or we wouldn't > have called byte code with what looks like an invalid hash table. Did you reply to the wrong thread - or - where is a connection to hash tables? > Do you still have the emacs binary that failed? No. BTW, looking at my backtrace again: Debugger entered--Lisp error: (error "Unknown keyword :link") signal(error ("Unknown keyword :link")) error("Unknown keyword %s" :link) custom-handle-keyword(gnus-cite :link (custom-manual "(gnus)Article Highlighting") custom-group) and at the definition of `custom-handle-keyword', I see that keyword -> :link must have failed this test: (eq keyword :link) as if there where two interned symbols :link. Quite strange. Michael. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-02 15:35 ` Michael Heerdegen @ 2019-07-02 16:20 ` Noam Postavsky 2019-07-02 22:50 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Noam Postavsky @ 2019-07-02 16:20 UTC (permalink / raw) To: Michael Heerdegen; +Cc: 36447, Pip Cet Michael Heerdegen <michael_heerdegen@web.de> writes: > Pip Cet <pipcet@gmail.com> writes: > >> > Hmm - seems so, yes. I just made bootstrap with the commit >> > included, and the problem is still gone. Strange thing, but seems >> > there is nothing to fix. >> >> I'm not sure I agree. Something went wrong somewhere, or we wouldn't >> have called byte code with what looks like an invalid hash table. > > Did you reply to the wrong thread - or - where is a connection to hash > tables? The compiler translates repeated `eq' in a cond like that into a hash and jump. See byte-compile-cond-use-jump-table. >> Do you still have the emacs binary that failed? > > No. 'make bootstrap' deletes all the old binaries. > and at the definition of `custom-handle-keyword', I see that keyword -> > :link must have failed this test: > > (eq keyword :link) > > as if there where two interned symbols :link. Quite strange. Or the jump table compilation was messed up at some point (e.g., Bug#35770, but I think that one was too long ago to explain your more recent problems). There are other patches in Bug#36139 which affect this, but I can't see how any of them could have caused problems which disappear after bootstrap. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-02 16:20 ` Noam Postavsky @ 2019-07-02 22:50 ` Pip Cet 2019-07-03 11:57 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-02 22:50 UTC (permalink / raw) To: Noam Postavsky; +Cc: Michael Heerdegen, 36447 [-- Attachment #1: Type: text/plain, Size: 1756 bytes --] On Tue, Jul 2, 2019 at 4:20 PM Noam Postavsky <npostavs@gmail.com> wrote: > Michael Heerdegen <michael_heerdegen@web.de> writes: > > Pip Cet <pipcet@gmail.com> writes: > >> > Hmm - seems so, yes. I just made bootstrap with the commit > >> > included, and the problem is still gone. Strange thing, but seems > >> > there is nothing to fix. > >> > >> I'm not sure I agree. Something went wrong somewhere, or we wouldn't > >> have called byte code with what looks like an invalid hash table. > > > > Did you reply to the wrong thread - or - where is a connection to hash > > tables? > > The compiler translates repeated `eq' in a cond like that into a hash > and jump. See byte-compile-cond-use-jump-table. I think I found the problem. It's a bit tricky. When we purecopy the hash tables emitted by the byte code compiler, Vpurify_flag is a hash table with predicate 'equal. That means the ->next vectors for different hash tables now might refer to the same pure vector. Rehashing such a hash table thus destroys another hash table's ->next vector, so it shouldn't happen. pdumper.c forces rehashing of many hash tables, including pure ones. The attached patch "fixes" things, at a high price. I'll try coming up with a proper fix soon if no one beats me to it. (To reproduce the problem, I added these lines to fn.c: DEFSYM (QCrehash_size, ":rehash-size"); DEFSYM (QCrehash_threshold, ":rehash-threshold"); DEFSYM (QCweakness, ":weakness"); + DEFSYM (QCgroup, ":group"); + DEFSYM (QCversion, ":version"); + DEFSYM (QCpackage_version, ":package-version"); + DEFSYM (QClink, ":link"); + DEFSYM (QCload, ":load"); + DEFSYM (QCtag, ":tag"); + DEFSYM (QCset_after, ":set-after"); DEFSYM (Qkey, "key"); DEFSYM (Qvalue, "value"); ) [-- Attachment #2: 0001-Fix-bug-36477.patch --] [-- Type: text/x-patch, Size: 781 bytes --] From 20b63dce1695cfc2dfa94961d03da88a4cceaffb Mon Sep 17 00:00:00 2001 From: Pip Cet <pipcet@gmail.com> Date: Tue, 2 Jul 2019 22:48:23 +0000 Subject: [PATCH] Fix bug#36477 --- src/fns.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/fns.c b/src/fns.c index 2fc000a7f4..375b7d2841 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4223,6 +4223,11 @@ hash_table_rehash (struct Lisp_Hash_Table *h) { ptrdiff_t size = HASH_TABLE_SIZE (h); + h->next = Fcopy_sequence (h->next); + h->index = Fcopy_sequence (h->index); + h->hash = Fcopy_sequence (h->hash); + h->key_and_value = Fcopy_sequence (h->key_and_value); + /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) -- 2.20.1 ^ permalink raw reply related [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-02 22:50 ` Pip Cet @ 2019-07-03 11:57 ` Pip Cet 2019-07-05 1:59 ` Michael Heerdegen 0 siblings, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-03 11:57 UTC (permalink / raw) To: Noam Postavsky; +Cc: Michael Heerdegen, 36447 [-- Attachment #1: Type: text/plain, Size: 1810 bytes --] On Tue, Jul 2, 2019 at 10:50 PM Pip Cet <pipcet@gmail.com> wrote: > > The compiler translates repeated `eq' in a cond like that into a hash > > and jump. See byte-compile-cond-use-jump-table. > > I think I found the problem. It's a bit tricky. I'm attaching a patch which modifies standard Emacs in what should be harmless ways, but demonstrates the bug. As far as I can tell, this is a serious issue that will pop up seemingly at random. We really should fix it. However, I thought hash_table_rehash was called more often than it actually is, so the fix should be simple: just copy-sequence a hash table's ->next, ->hash, and ->index vectors before rehashing. (->key_and_value isn't actually modified, so it's safe to share between (read-only) hash tables. What's happening is this: Purecopied hash tables share structure, particularly the ->next vector, with similar but different purecopied hash tables. Some purecopied hash tables dumped with pdumper are rehashed upon first access after loading the dump (in the example, this is the tables which map 'dummy-symbol to t), while others are not (the tables with just fixnums as keys). If a hash table is rehashed, it's ->next vector will change to reflect the changed hash (of 'dummy-symbol); however, the non-rehashed table that shares the ->next vector will be confused: its key_and_value array will stay the same, and valid, but the ->next vector will no longer match it. In practice, this means (gethash valid-key hash-table) will return nil even though the key is valid. While the attached patch appears to work, I would prefer simply dumping hash tables with nil values for ->next, ->hash, and ->index, and rebuilding the entire hash table upon first access. This would also allow us to switch to secure randomized hashes should the need arise. [-- Attachment #2: 0001-Test-bug-36447.patch --] [-- Type: text/x-patch, Size: 2007 bytes --] From 8fe464dbb030a722ca173c442906482ef210f46e Mon Sep 17 00:00:00 2001 From: Pip Cet <pipcet@gmail.com> Date: Wed, 3 Jul 2019 11:27:50 +0000 Subject: [PATCH 1/2] Test bug#36447 --- lisp/custom.el | 10 ++++++++++ src/puresize.h | 2 +- test-custom.el | 12 ++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test-custom.el diff --git a/lisp/custom.el b/lisp/custom.el index 736460fec7..c49321b829 100644 --- a/lisp/custom.el +++ b/lisp/custom.el @@ -35,6 +35,16 @@ (require 'widget) +(defvar hash-tables (make-vector 256 nil)) + +(dotimes (i 256) + (let ((ht (make-hash-table :purecopy t :size 2 :test 'eq))) + (when (= (% i 3) 0) + (puthash 'dummy-symbol t ht)) + (dotimes (j 16) + (puthash (logand i (lsh 1 j)) t ht)) + (aset hash-tables i (purecopy ht)))) + (defvar custom-define-hook nil ;; Customize information for this option is in `cus-edit.el'. "Hook called after defining each customize option.") diff --git a/src/puresize.h b/src/puresize.h index f5fad8b42b..d29e3e80df 100644 --- a/src/puresize.h +++ b/src/puresize.h @@ -47,7 +47,7 @@ #define SITELOAD_PURESIZE_EXTRA 0 #endif #ifndef BASE_PURESIZE -#define BASE_PURESIZE (2000000 + SYSTEM_PURESIZE_EXTRA + SITELOAD_PURESIZE_EXTRA) +#define BASE_PURESIZE (80000000 + SYSTEM_PURESIZE_EXTRA + SITELOAD_PURESIZE_EXTRA) #endif /* Increase BASE_PURESIZE by a ratio depending on the machine's word size. */ diff --git a/test-custom.el b/test-custom.el new file mode 100644 index 0000000000..b871d30cfa --- /dev/null +++ b/test-custom.el @@ -0,0 +1,12 @@ +(dotimes (i 256) + (let ((ht (aref hash-tables i))) + (dotimes (j 16) + (unless (eq (/= 0 (logand (lsh 1 j) i)) + (gethash (lsh 1 j) ht)) + (error "hash table corruption at table %S, bit %S" i j))))) + +(aref hash-tables 17) +;; #s(hash-table size 3 test eq rehash-size 1.5 rehash-threshold 0.8125 purecopy t data (1 t 0 t 16 t)) + +(gethash 1 (aref hash-tables 17)) +;; nil -- 2.20.1 [-- Attachment #3: 0002-Don-t-alter-shared-structure-in-dumped-purecopied-ha.patch --] [-- Type: text/x-patch, Size: 964 bytes --] From 70419f630f60919c8645a10aeef8d299f5098ff5 Mon Sep 17 00:00:00 2001 From: Pip Cet <pipcet@gmail.com> Date: Wed, 3 Jul 2019 11:48:22 +0000 Subject: [PATCH 2/2] Don't alter shared structure in dumped purecopied hash tables. * src/fns.c (hash_table_rehash): Make sure we're operating on fresh copies of ->next, ->index, ->hash. --- src/fns.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/fns.c b/src/fns.c index 2fc000a7f4..44d2de523a 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4223,6 +4223,12 @@ hash_table_rehash (struct Lisp_Hash_Table *h) { ptrdiff_t size = HASH_TABLE_SIZE (h); + /* These structures may have been purecopied and shared + (bug#36447). */ + h->next = Fcopy_sequence (h->next); + h->index = Fcopy_sequence (h->index); + h->hash = Fcopy_sequence (h->hash); + /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) -- 2.20.1 ^ permalink raw reply related [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-03 11:57 ` Pip Cet @ 2019-07-05 1:59 ` Michael Heerdegen 2019-07-05 6:35 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Michael Heerdegen @ 2019-07-05 1:59 UTC (permalink / raw) To: Pip Cet; +Cc: Noam Postavsky, 36447 Pip Cet <pipcet@gmail.com> writes: > As far as I can tell, this is a serious issue that will pop up > seemingly at random. We really should fix it. Yeah, it just happened again, when rebuilding a newer master version. And this time, cleaning and make bootstrap didn't fix it. I still use the broken build FWIW as it's not as broken as the last time. Anything I should do or try with it? Michael. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 1:59 ` Michael Heerdegen @ 2019-07-05 6:35 ` Pip Cet 2019-07-05 7:50 ` Eli Zaretskii 2019-07-05 7:55 ` Katsumi Yamaoka 0 siblings, 2 replies; 53+ messages in thread From: Pip Cet @ 2019-07-05 6:35 UTC (permalink / raw) To: Michael Heerdegen; +Cc: Noam Postavsky, 36447 On Fri, Jul 5, 2019 at 1:59 AM Michael Heerdegen <michael_heerdegen@web.de> wrote: > > Pip Cet <pipcet@gmail.com> writes: > > > As far as I can tell, this is a serious issue that will pop up > > seemingly at random. We really should fix it. > > Yeah, it just happened again, when rebuilding a newer master version. > And this time, cleaning and make bootstrap didn't fix it. I still use > the broken build FWIW as it's not as broken as the last time. Is it still the same symptom, though? > Anything I should do or try with it? I'd like to verify it's indeed the bug I think it is. I think I could figure that out if I had access to your Emacs binaries (emacs and emacs.pdmp), but you can also debug things locally. Can you do the following? 1. Run in gdb, with ASLR disabled 2. Set a breakpoint in print_vectorlike ("b print_vectorlike") 3. evaluate, in Emacs, (aref (aref (symbol-function #'custom-handle-keyword) 2) 2) 4. wait for the breakpoint to be hit 5. in GDB, disable the breakpoint ("dis") 6. print and prettyprint the hash's next, hash, index, and key_and_value vectors by typing, in gdb p XHASH_TABLE(obj)->next pp XHASH_TABLE(obj)->next p XHASH_TABLE(obj)->hash pp XHASH_TABLE(obj)->hash p XHASH_TABLE(obj)->index pp XHASH_TABLE(obj)->index p XHASH_TABLE(obj)->key_and_value pp XHASH_TABLE(obj)->key_and_value (maybe you need to go up a stack frame or two so obj isn't optimized away). 7. Set a read/write breakpoint on the index table: rwatch -l *(long *)&XHASH_TABLE(obj)->index watch -l *(long *)XHASH_TABLE(obj)->index 8. Rerun emacs with gdb's "r" command, and find out where the watchpoint is hit. In particular, if I'm right, it's accessed from two different hash table objects. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 6:35 ` Pip Cet @ 2019-07-05 7:50 ` Eli Zaretskii 2019-07-05 8:12 ` Pip Cet 2019-07-05 7:55 ` Katsumi Yamaoka 1 sibling, 1 reply; 53+ messages in thread From: Eli Zaretskii @ 2019-07-05 7:50 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447 > From: Pip Cet <pipcet@gmail.com> > Date: Fri, 5 Jul 2019 06:35:03 +0000 > Cc: Noam Postavsky <npostavs@gmail.com>, 36447@debbugs.gnu.org > > 7. Set a read/write breakpoint on the index table: > > rwatch -l *(long *)&XHASH_TABLE(obj)->index > watch -l *(long *)XHASH_TABLE(obj)->index I think this is the same as awatch -l *(long *)XHASH_TABLE(obj)->index ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 7:50 ` Eli Zaretskii @ 2019-07-05 8:12 ` Pip Cet 2019-07-05 8:25 ` Eli Zaretskii 0 siblings, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-05 8:12 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, 36447 On Fri, Jul 5, 2019 at 7:50 AM Eli Zaretskii <eliz@gnu.org> wrote: > > rwatch -l *(long *)&XHASH_TABLE(obj)->index > > watch -l *(long *)XHASH_TABLE(obj)->index > > I think this is the same as > > awatch -l *(long *)XHASH_TABLE(obj)->index Thanks for checking! What I actually meant was awatch -l *(long *)&XHASH_TABLE(obj)->index With revision 44f199648b0c986a0ac7608f4e9d803c619ae2d6, I can reproduce this problem locally, and I can confirm it's as I thought: y-or-no-p and custom-handle-keyword both generate 7-element hash tables. They share a ->next vector. Both try to rehash the hash table, and since there are non-builtin symbols in there, the new hash collision chains should differ, but can't, since they share a vector. I don't think we can sensibly add tests for this bug, but the fix I posted earlier still seems valid to me. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 8:12 ` Pip Cet @ 2019-07-05 8:25 ` Eli Zaretskii 2019-07-05 8:36 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Eli Zaretskii @ 2019-07-05 8:25 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447 > From: Pip Cet <pipcet@gmail.com> > Date: Fri, 5 Jul 2019 08:12:07 +0000 > Cc: michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > > On Fri, Jul 5, 2019 at 7:50 AM Eli Zaretskii <eliz@gnu.org> wrote: > > > rwatch -l *(long *)&XHASH_TABLE(obj)->index > > > watch -l *(long *)XHASH_TABLE(obj)->index > > > > I think this is the same as > > > > awatch -l *(long *)XHASH_TABLE(obj)->index > > Thanks for checking! What I actually meant was > awatch -l *(long *)&XHASH_TABLE(obj)->index But then why do you need the rwatch as well? awatch breaks both on read accesses and on write accesses. > With revision 44f199648b0c986a0ac7608f4e9d803c619ae2d6, I can > reproduce this problem locally, and I can confirm it's as I thought: > > y-or-no-p and custom-handle-keyword both generate 7-element hash > tables. They share a ->next vector. Both try to rehash the hash table, > and since there are non-builtin symbols in there, the new hash > collision chains should differ, but can't, since they share a vector. > > I don't think we can sensibly add tests for this bug, but the fix I > posted earlier still seems valid to me. Sorry, I'm not tracking this part of the discussion, as it lost me long ago. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 8:25 ` Eli Zaretskii @ 2019-07-05 8:36 ` Pip Cet 2019-07-05 8:41 ` Eli Zaretskii 0 siblings, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-05 8:36 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, 36447 On Fri, Jul 5, 2019 at 8:25 AM Eli Zaretskii <eliz@gnu.org> wrote: > > From: Pip Cet <pipcet@gmail.com> > > Date: Fri, 5 Jul 2019 08:12:07 +0000 > > Cc: michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > > > > On Fri, Jul 5, 2019 at 7:50 AM Eli Zaretskii <eliz@gnu.org> wrote: > > > > rwatch -l *(long *)&XHASH_TABLE(obj)->index > > > > watch -l *(long *)XHASH_TABLE(obj)->index > > > > > > I think this is the same as > > > > > > awatch -l *(long *)XHASH_TABLE(obj)->index > > > > Thanks for checking! What I actually meant was > > awatch -l *(long *)&XHASH_TABLE(obj)->index > > But then why do you need the rwatch as well? awatch breaks both on > read accesses and on write accesses. I don't! Just the awatch is fine, thanks again for pointing it out. It's just that the argument needs a & or we're watching the wrong bit of memory. > > With revision 44f199648b0c986a0ac7608f4e9d803c619ae2d6, I can > > reproduce this problem locally, and I can confirm it's as I thought: > > > > y-or-no-p and custom-handle-keyword both generate 7-element hash > > tables. They share a ->next vector. Both try to rehash the hash table, > > and since there are non-builtin symbols in there, the new hash > > collision chains should differ, but can't, since they share a vector. I've since confirmed that this gdb session does not exhibit the bug: b hash_table_rehash Breakpoint 3 at 0x66bbb6: file fns.c, line 4224. (gdb) command 3 Type commands for breakpoint(s) 3, one per line. End with a line saying just "end". >p h->next = Fcopy_sequence(h->next) >c >end (gdb) r (this is on a CFLAGS="-O0 -g3 -ggdb" build, on GNU/Linux) > > I don't think we can sensibly add tests for this bug, but the fix I > > posted earlier still seems valid to me. > > Sorry, I'm not tracking this part of the discussion, as it lost me > long ago. What's the best way of getting this fixed? Current master is pretty unusable due to this bug, once you call `y-or-n-p' and `custom-handle-keyword', one of the two will stop working (unless you reload their bytecode files first, unsharing their collision chains). ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 8:36 ` Pip Cet @ 2019-07-05 8:41 ` Eli Zaretskii 2019-07-05 9:09 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Eli Zaretskii @ 2019-07-05 8:41 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447 > From: Pip Cet <pipcet@gmail.com> > Date: Fri, 5 Jul 2019 08:36:57 +0000 > Cc: michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > > > > I don't think we can sensibly add tests for this bug, but the fix I > > > posted earlier still seems valid to me. > > > > Sorry, I'm not tracking this part of the discussion, as it lost me > > long ago. > > What's the best way of getting this fixed? Sorry, I don't think I know what "this bug" is about, and how is the issue with hash tables relevant. I think I understood everything until the point where the discussion quite unexpectedly switched to talking about hash tables, which is where I lost the thread of reasoning. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 8:41 ` Eli Zaretskii @ 2019-07-05 9:09 ` Pip Cet 2019-07-05 12:23 ` Robert Pluim 2019-07-05 12:33 ` Eli Zaretskii 0 siblings, 2 replies; 53+ messages in thread From: Pip Cet @ 2019-07-05 9:09 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, 36447 [-- Attachment #1: Type: text/plain, Size: 3427 bytes --] On Fri, Jul 5, 2019 at 8:41 AM Eli Zaretskii <eliz@gnu.org> wrote: > > > From: Pip Cet <pipcet@gmail.com> > > Date: Fri, 5 Jul 2019 08:36:57 +0000 > > Cc: michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > > > > > > I don't think we can sensibly add tests for this bug, but the fix I > > > > posted earlier still seems valid to me. > > > > > > Sorry, I'm not tracking this part of the discussion, as it lost me > > > long ago. > > > > What's the best way of getting this fixed? > > Sorry, I don't think I know what "this bug" is about, The bug: Building emacs with "-O0 -g3 -ggdb" on current Linux will result in binaries that sometimes, depending on the precise compiler version used, will fail weirdly if you evaluate this emacs -Q recipe line by line: (custom-handle-keyword nil :group nil nil) (y-or-n-p "prompt") (custom-handle-keyword nil :group nil nil) The error produced will be "unknown keyword :group", which is nonsensical as :group is indeed a valid keyword. The analysis: It's not the byte code, which is fine and looks like this: byte code for custom-handle-keyword: doc: For customization option SYMBOL, handle KEYWORD with VALUE. ... args: (arg1 arg2 arg3 arg4) 0 varref purify-flag 1 goto-if-nil 1 4 constant purecopy 5 stack-ref 2 6 call 1 7 stack-set 2 9:1 stack-ref 2 10 constant <jump-table-eq (:group 2 :version 3 :package-version 4 :link 5 :load 6 :tag 7 :set-after 8)> 11 switch 12 goto 9 ... 52:9 constant error 53 constant "Unknown keyword %s" 54 stack-ref 4 55 call 2 56 return Note that the code uses a jump table, which is a hash table mapping keys to integers for the "switch" op. This is where hash tables come in. We can inspect the hash table `custom-handle-keyword' uses by evaluating (aref (aref (symbol-function #'custom-handle-keyword) 2) 2) The hash table prints fine. But investigating its C in-memory representation, we find that the hash collision chains, stored in the ->next vector, are corrupted. It turns out that this is because hash_table_rehash was called on a different hash table which had the same ->next vector, but different contents. That's the problem I fixed: for reasons explained below, we sometimes see two hash tables with the same ->next vector, then try to rehash both of them, obtaining different results. Last caller wins, first caller gets the corruption (each hash table is rehashed at most once). The reasons are this: when a hash table is purecopied, its ->next vector is purecopied, which merges it with another, similar, hash table's ->next vector if purify-flag is a (third) hash table. The vectors are compared using `equal', but the pure copies are actually `eq'. This worked fine with the old dumper, because we never modified pure storage. However, with the current pdumper code, we have to do that. The (disappointingly trivial) fix: call copy-sequence on h->next before rehashing the table. This will make h->next impure, which is good since we're going to modify it. While we're there, do the same for the other vectors used in the hash table representation, except for h->key_and_value, which we need not touch. > and how is the issue with hash tables relevant. The bytecode is executing incorrectly because it relies on a purecopied hash table, which is effectively part of the compiled function. The hash table has become corrupted. [-- Attachment #2: 0002-Don-t-alter-shared-structure-in-dumped-purecopied-ha.patch --] [-- Type: text/x-patch, Size: 964 bytes --] From 70419f630f60919c8645a10aeef8d299f5098ff5 Mon Sep 17 00:00:00 2001 From: Pip Cet <pipcet@gmail.com> Date: Wed, 3 Jul 2019 11:48:22 +0000 Subject: [PATCH 2/2] Don't alter shared structure in dumped purecopied hash tables. * src/fns.c (hash_table_rehash): Make sure we're operating on fresh copies of ->next, ->index, ->hash. --- src/fns.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/fns.c b/src/fns.c index 2fc000a7f4..44d2de523a 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4223,6 +4223,12 @@ hash_table_rehash (struct Lisp_Hash_Table *h) { ptrdiff_t size = HASH_TABLE_SIZE (h); + /* These structures may have been purecopied and shared + (bug#36447). */ + h->next = Fcopy_sequence (h->next); + h->index = Fcopy_sequence (h->index); + h->hash = Fcopy_sequence (h->hash); + /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) -- 2.20.1 ^ permalink raw reply related [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 9:09 ` Pip Cet @ 2019-07-05 12:23 ` Robert Pluim 2019-07-05 12:33 ` Eli Zaretskii 1 sibling, 0 replies; 53+ messages in thread From: Robert Pluim @ 2019-07-05 12:23 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447 >>>>> On Fri, 5 Jul 2019 09:09:13 +0000, Pip Cet <pipcet@gmail.com> said: Pip> This worked fine with the old dumper, because we never modified pure Pip> storage. However, with the current pdumper code, we have to do that. I have to ask: why do we still have pure storage? I seem to remember XEmacs got rid of it at least a decade ago with no ill effects. Robert ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 9:09 ` Pip Cet 2019-07-05 12:23 ` Robert Pluim @ 2019-07-05 12:33 ` Eli Zaretskii 2019-07-05 13:41 ` Pip Cet 2019-07-05 18:00 ` Stefan Monnier 1 sibling, 2 replies; 53+ messages in thread From: Eli Zaretskii @ 2019-07-05 12:33 UTC (permalink / raw) To: Pip Cet, Stefan Monnier; +Cc: michael_heerdegen, npostavs, 36447 > From: Pip Cet <pipcet@gmail.com> > Date: Fri, 5 Jul 2019 09:09:13 +0000 > Cc: michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > > > Sorry, I don't think I know what "this bug" is about, > > The bug: Thanks for the explanations. I'm CC'ing Stefan who knows much more about this than I do. > The reasons are this: when a hash table is purecopied, its ->next > vector is purecopied, which merges it with another, similar, hash > table's ->next vector if purify-flag is a (third) hash table. The > vectors are compared using `equal', but the pure copies are actually > `eq'. A naïve question: wouldn't the problem go away if we modified purecopy not to do the above, i.e. not to merge the next vector of a hash table with that of another? > The (disappointingly trivial) fix: > call copy-sequence on h->next before rehashing the table. Rehashing is not only done during dumping, right? So the fix you propose also makes rehashing slightly less efficient. Is that necessary, i.e. are you saying that the bug is in rehashing, not in purecopy? ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 12:33 ` Eli Zaretskii @ 2019-07-05 13:41 ` Pip Cet 2019-07-05 18:00 ` Stefan Monnier 1 sibling, 0 replies; 53+ messages in thread From: Pip Cet @ 2019-07-05 13:41 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, 36447, Stefan Monnier On Fri, Jul 5, 2019 at 12:33 PM Eli Zaretskii <eliz@gnu.org> wrote: > Thanks for the explanations. I'm CC'ing Stefan who knows much more > about this than I do. Thank you. > > The reasons are this: when a hash table is purecopied, its ->next > > vector is purecopied, which merges it with another, similar, hash > > table's ->next vector if purify-flag is a (third) hash table. The > > vectors are compared using `equal', but the pure copies are actually > > `eq'. > > A naïve question: wouldn't the problem go away if we modified purecopy > not to do the above, i.e. not to merge the next vector of a hash table > with that of another? I thought about that, but it's a bit complicated: either we'd keep the ->next vector impure (and make a copy of it), or we would have to add a "don't hash-cons" argument to purecopy(), which kind of defeats the purpose. > > The (disappointingly trivial) fix: > > call copy-sequence on h->next before rehashing the table. > > Rehashing is not only done during dumping, right? As far as I can tell, it's only done when first accessing a dumped hash table, but I might be wrong. (In any case, it seems `hash-table-count' then returns a negative value for the hash table, which seems problematic to me.) > So the fix you > propose also makes rehashing slightly less efficient. Is that > necessary, i.e. are you saying that the bug is in rehashing, not in > purecopy? Again, I may be wrong, since we're using the sign of h->count to indicate whether a hash table needs rehashing and that's hard to grep for, but I think this only affects rehashing after dump, not rehashing when resizing a hash table. It's certainly not called very often, and not for very large hash tables. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 12:33 ` Eli Zaretskii 2019-07-05 13:41 ` Pip Cet @ 2019-07-05 18:00 ` Stefan Monnier 2019-07-05 18:07 ` Eli Zaretskii 2019-07-05 18:57 ` Pip Cet 1 sibling, 2 replies; 53+ messages in thread From: Stefan Monnier @ 2019-07-05 18:00 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, Pip Cet, 36447 > A naïve question: wouldn't the problem go away if we modified purecopy > not to do the above, i.e. not to merge the next vector of a hash table > with that of another? It might do the trick, yes (the patch below does that, for example, tho I think Pip's patch is a better approach). Note that it would mean we still modify data in the purespace, which ideally shouldn't happen. The problem fundamentally is that `purecopy` assumes that the argument passed to it will never again be modified (it doesn't have to be immutable before, but it should be afterwards) but if we rehash afterwards then we break this promise. >> The (disappointingly trivial) fix: >> call copy-sequence on h->next before rehashing the table. > Rehashing is not only done during dumping, right? The problem is not rehashing in general, but rehashing a purecopied hash-table. This should normally never be needed, since a purecopied hash-table should never be modified (no puthash/remhash should be applied to it), but sadly pdumper may need to recompute the hashes because objects's addresses may have changed between the dump and the reload. > So the fix you propose also makes rehashing slightly less efficient. In my patch I added a comment to explain that hash_table_rehash is not used in "normal" rehashing, but only in the rare case of rehashing on the first access to a preloaded hash-table, so the efficiency His patch looks good (probably better than mine). His patch can/should be made slightly more efficient by only doing the Fcopy_sequence on those hash-tables that are in purespace. Stefan diff --git a/src/fns.c b/src/fns.c index 2fc000a7f4..cc4fd7f2d7 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4218,6 +4218,11 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) } } +/* Recompute the hashes (and hence also the "next" pointers). + Normally there's never a need to recompute hashes + This is done only on first-access to a hash-table loaded from + the "pdump", because the object's addresses may have changed, thus + affecting their hash. */ void hash_table_rehash (struct Lisp_Hash_Table *h) { diff --git a/src/alloc.c b/src/alloc.c index 64aaa8acdf..b0114a9459 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -5348,9 +5348,24 @@ purecopy_hash_table (struct Lisp_Hash_Table *table) pure->header = table->header; pure->weak = purecopy (Qnil); + /* After reloading the pdumped data, objects may ehave changed location + in memory, so their hash may have changed. So the `hash`, `next`, and + `index` vectors are not immutable and can't safely be hash-cons'ed. */ + if (HASH_TABLE_P (Vpurify_flag)) + { + Fremhash (table->hash, Vpurify_flag); + Fremhash (table->next, Vpurify_flag); + Fremhash (table->index, Vpurify_flag); + } pure->hash = purecopy (table->hash); pure->next = purecopy (table->next); pure->index = purecopy (table->index); + if (HASH_TABLE_P (Vpurify_flag)) + { + Fremhash (table->hash, Vpurify_flag); + Fremhash (table->next, Vpurify_flag); + Fremhash (table->index, Vpurify_flag); + } pure->count = table->count; pure->next_free = table->next_free; pure->pure = table->pure; ^ permalink raw reply related [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 18:00 ` Stefan Monnier @ 2019-07-05 18:07 ` Eli Zaretskii 2019-07-05 20:16 ` Stefan Monnier 2019-07-05 18:57 ` Pip Cet 1 sibling, 1 reply; 53+ messages in thread From: Eli Zaretskii @ 2019-07-05 18:07 UTC (permalink / raw) To: Stefan Monnier; +Cc: michael_heerdegen, npostavs, pipcet, 36447 > From: Stefan Monnier <monnier@iro.umontreal.ca> > Cc: Pip Cet <pipcet@gmail.com>, michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > Date: Fri, 05 Jul 2019 14:00:22 -0400 > > The problem is not rehashing in general, but rehashing a purecopied > hash-table. This should normally never be needed, since a purecopied > hash-table should never be modified (no puthash/remhash should be > applied to it), but sadly pdumper may need to recompute the hashes > because objects's addresses may have changed between the dump and > the reload. Then perhaps pdumper should make a new object when it does that? > His patch can/should be made slightly more efficient by only doing the > Fcopy_sequence on those hash-tables that are in purespace. I cannot say I like tweaking the GP implementation for the benefit of what only the pdumper must do, it doesn't feel right. Problems caused by the pdumper should IMO be fixed in pdumper code. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 18:07 ` Eli Zaretskii @ 2019-07-05 20:16 ` Stefan Monnier 0 siblings, 0 replies; 53+ messages in thread From: Stefan Monnier @ 2019-07-05 20:16 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, pipcet, 36447 >> His patch can/should be made slightly more efficient by only doing the >> Fcopy_sequence on those hash-tables that are in purespace. > > I cannot say I like tweaking the GP implementation for the benefit of > what only the pdumper must do, it doesn't feel right. Problems caused > by the pdumper should IMO be fixed in pdumper code. hash_table_rehash *is* pdumper code. Stefan ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 18:00 ` Stefan Monnier 2019-07-05 18:07 ` Eli Zaretskii @ 2019-07-05 18:57 ` Pip Cet 2019-07-05 19:13 ` Eli Zaretskii 2019-07-05 20:21 ` Stefan Monnier 1 sibling, 2 replies; 53+ messages in thread From: Pip Cet @ 2019-07-05 18:57 UTC (permalink / raw) To: Stefan Monnier; +Cc: michael_heerdegen, npostavs, 36447 On Fri, Jul 5, 2019 at 6:00 PM Stefan Monnier <monnier@iro.umontreal.ca> wrote: > > A naïve question: wouldn't the problem go away if we modified purecopy > > not to do the above, i.e. not to merge the next vector of a hash table > > with that of another? > > It might do the trick, yes (the patch below does that, for example, tho > I think Pip's patch is a better approach). > Note that it would mean we still modify data in the purespace, which > ideally shouldn't happen. > > The problem fundamentally is that `purecopy` assumes that the argument > passed to it will never again be modified (it doesn't have to be > immutable before, but it should be afterwards) but if we rehash > afterwards then we break this promise. Surely you mean you're allowed to modify the argument to purecopy, just not the return value of it? > His patch can/should be made slightly more efficient by only doing the > Fcopy_sequence on those hash-tables that are in purespace. How do we test for that? If I'm reading the pdumper code correctly, it'll put all "unstable" hash tables at the end of the dump, far away from the actually pure objects. I'm not sure how difficult it would be, but we could dump the ->index, ->next, ->hash vectors as Qnil (so not include the actual vectors in the dump), which would make the dump slightly smaller and give us a better test than h->count < 0: INLINE bool hash_rehash_needed_p (const struct Lisp_Hash_Table *h) { return NILP (h->index); } (I used h->index because it's in the same cache line as h->count). But that's hard to do without writing a shrink_hash_table function to be used before dumping. I think hash tables should be shrinkable, but that's beyond the scope of this bug. > diff --git a/src/fns.c b/src/fns.c > index 2fc000a7f4..cc4fd7f2d7 100644 > --- a/src/fns.c > +++ b/src/fns.c > @@ -4218,6 +4218,11 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) > } > } > > +/* Recompute the hashes (and hence also the "next" pointers). And the "index" pointers, if we're listing all of them. > + Normally there's never a need to recompute hashes > + This is done only on first-access to a hash-table loaded from > + the "pdump", because the object's addresses may have changed, thus > + affecting their hash. */ > void > hash_table_rehash (struct Lisp_Hash_Table *h) > { > diff --git a/src/alloc.c b/src/alloc.c > index 64aaa8acdf..b0114a9459 100644 > --- a/src/alloc.c > +++ b/src/alloc.c > @@ -5348,9 +5348,24 @@ purecopy_hash_table (struct Lisp_Hash_Table *table) > > pure->header = table->header; > pure->weak = purecopy (Qnil); > + /* After reloading the pdumped data, objects may ehave changed location > + in memory, so their hash may have changed. So the `hash`, `next`, and > + `index` vectors are not immutable and can't safely be hash-cons'ed. */ > + if (HASH_TABLE_P (Vpurify_flag)) > + { > + Fremhash (table->hash, Vpurify_flag); > + Fremhash (table->next, Vpurify_flag); > + Fremhash (table->index, Vpurify_flag); > + } > pure->hash = purecopy (table->hash); > pure->next = purecopy (table->next); Slightly contrived problem here: if the single key in a single-entry EQ hash is -7, or an expression that happens to have hash value -1, pure->hash and pure->next would be EQ after these two lines, and updating the hash would corrupt the hash table... > pure->index = purecopy (table->index); Same for ->index and ->hash if both are equal to [0]. > + if (HASH_TABLE_P (Vpurify_flag)) > + { > + Fremhash (table->hash, Vpurify_flag); > + Fremhash (table->next, Vpurify_flag); > + Fremhash (table->index, Vpurify_flag); > + } ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 18:57 ` Pip Cet @ 2019-07-05 19:13 ` Eli Zaretskii 2019-07-05 20:21 ` Stefan Monnier 1 sibling, 0 replies; 53+ messages in thread From: Eli Zaretskii @ 2019-07-05 19:13 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447, monnier > From: Pip Cet <pipcet@gmail.com> > Date: Fri, 5 Jul 2019 18:57:56 +0000 > Cc: Eli Zaretskii <eliz@gnu.org>, michael_heerdegen@web.de, npostavs@gmail.com, > 36447@debbugs.gnu.org > > > His patch can/should be made slightly more efficient by only doing the > > Fcopy_sequence on those hash-tables that are in purespace. > > How do we test for that? Can pdumper_object_p help? ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 18:57 ` Pip Cet 2019-07-05 19:13 ` Eli Zaretskii @ 2019-07-05 20:21 ` Stefan Monnier 2019-07-05 21:52 ` Pip Cet 1 sibling, 1 reply; 53+ messages in thread From: Stefan Monnier @ 2019-07-05 20:21 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447 >> His patch can/should be made slightly more efficient by only doing the >> Fcopy_sequence on those hash-tables that are in purespace. > > How do we test for that? With PURE_P? > I'm not sure how difficult it would be, but we could dump the ->index, > ->next, ->hash vectors as Qnil (so not include the actual vectors in > the dump), which would make the dump slightly smaller and give us a > better test than h->count < 0: Except that it can't be done at the time of purecopy but only at the time we do the actual dump because the purecopied hashtable may be used in-between (which is also why count is kept positive by purecopy and is only made negative later). > Slightly contrived problem here: if the single key in a single-entry > EQ hash is -7, or an expression that happens to have hash value -1, > pure->hash and pure->next would be EQ after these two lines, and > updating the hash would corrupt the hash table... Indeed. As I said, I think your approach is better, I only included it for reference and for the comments I had added. Stefan ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 20:21 ` Stefan Monnier @ 2019-07-05 21:52 ` Pip Cet 2019-07-05 22:10 ` Stefan Monnier 2019-07-06 15:32 ` Michael Heerdegen 0 siblings, 2 replies; 53+ messages in thread From: Pip Cet @ 2019-07-05 21:52 UTC (permalink / raw) To: Stefan Monnier; +Cc: michael_heerdegen, npostavs, 36447 [-- Attachment #1: Type: text/plain, Size: 1096 bytes --] On Fri, Jul 5, 2019 at 8:21 PM Stefan Monnier <monnier@iro.umontreal.ca> wrote: > >> His patch can/should be made slightly more efficient by only doing the > >> Fcopy_sequence on those hash-tables that are in purespace. > > > > How do we test for that? > > With PURE_P? That only works before we dump, I think? > > I'm not sure how difficult it would be, but we could dump the ->index, > > ->next, ->hash vectors as Qnil (so not include the actual vectors in > > the dump), which would make the dump slightly smaller and give us a > > better test than h->count < 0: > > Except that it can't be done at the time of purecopy but only at the > time we do the actual dump because the purecopied hashtable may be used > in-between (which is also why count is kept positive by purecopy and is > only made negative later). Indeed. I'm attaching a proof of concept that we can simply freeze the hash tables when dumping and thaw them when loading a dump, which includes rehashing. Do you happen to know why it wasn't done that way? I quite enjoyed removing all the scattered hash_rehash_if_needed()s. [-- Attachment #2: 0001-rehash-hash-tables-eagerly-after-loading-the-pdumper.patch --] [-- Type: text/x-patch, Size: 12704 bytes --] From 83f915b32faf32e98ccd806179e379a13b76618d Mon Sep 17 00:00:00 2001 From: Pip Cet <pipcet@gmail.com> Date: Fri, 5 Jul 2019 21:50:44 +0000 Subject: [PATCH] rehash hash tables eagerly after loading the pdumper dump. --- src/bytecode.c | 1 - src/composite.c | 1 - src/emacs.c | 1 + src/fns.c | 27 +++------- src/lisp.h | 15 ------ src/minibuf.c | 3 -- src/pdumper.c | 132 +++++++++++++++++++++++++++++++++--------------- src/pdumper.h | 1 + 8 files changed, 102 insertions(+), 79 deletions(-) diff --git a/src/bytecode.c b/src/bytecode.c index 29dff44f00..9c72429e0c 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) , Lisp_Object v1 = POP; ptrdiff_t i; struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table); - hash_rehash_if_needed (h); /* h->count is a faster approximation for HASH_TABLE_SIZE (h) here. */ diff --git a/src/composite.c b/src/composite.c index 183062de46..49a285cff0 100644 --- a/src/composite.c +++ b/src/composite.c @@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header) composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len) { struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table); - hash_rehash_if_needed (h); Lisp_Object header = LGSTRING_HEADER (gstring); EMACS_UINT hash = h->test.hashfn (&h->test, header); if (len < 0) diff --git a/src/emacs.c b/src/emacs.c index 32bb57e272..a5969a96a3 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1577,6 +1577,7 @@ main (int argc, char **argv) if (!initialized) { init_alloc_once (); + init_pdumper_once (); init_obarray_once (); init_eval_once (); init_charset_once (); diff --git a/src/fns.c b/src/fns.c index 2fc000a7f4..8b60311614 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4218,6 +4218,9 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) } } +extern void +hash_table_rehash (struct Lisp_Hash_Table *h); + void hash_table_rehash (struct Lisp_Hash_Table *h) { @@ -4226,12 +4229,11 @@ hash_table_rehash (struct Lisp_Hash_Table *h) /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (h, i))) - { - Lisp_Object key = HASH_KEY (h, i); - EMACS_UINT hash_code = h->test.hashfn (&h->test, key); - set_hash_hash_slot (h, i, make_fixnum (hash_code)); - } + { + Lisp_Object key = HASH_KEY (h, i); + EMACS_UINT hash_code = h->test.hashfn (&h->test, key); + set_hash_hash_slot (h, i, make_fixnum (hash_code)); + } /* Reset the index so that any slot we don't fill below is marked invalid. */ @@ -4247,13 +4249,6 @@ hash_table_rehash (struct Lisp_Hash_Table *h) set_hash_index_slot (h, start_of_bucket, i); eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ } - - /* Finally, mark the hash table as having a valid hash order. - Do this last so that if we're interrupted, we retry on next - access. */ - eassert (h->count < 0); - h->count = -h->count; - eassert (!hash_rehash_needed_p (h)); } /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH @@ -4266,8 +4261,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, EMACS_UINT *hash) EMACS_UINT hash_code; ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - hash_code = h->test.hashfn (&h->test, key); eassert ((hash_code & ~INTMASK) == 0); if (hash) @@ -4296,8 +4289,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value, { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - eassert ((hash & ~INTMASK) == 0); /* Increment count after resizing because resizing may fail. */ @@ -4331,8 +4322,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key) ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); ptrdiff_t prev = -1; - hash_rehash_if_needed (h); - for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i)) diff --git a/src/lisp.h b/src/lisp.h index 1a1d8ee7e4..7b77dd97f5 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2370,21 +2370,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h) return ASIZE (h->next); } -void hash_table_rehash (struct Lisp_Hash_Table *h); - -INLINE bool -hash_rehash_needed_p (const struct Lisp_Hash_Table *h) -{ - return h->count < 0; -} - -INLINE void -hash_rehash_if_needed (struct Lisp_Hash_Table *h) -{ - if (hash_rehash_needed_p (h)) - hash_table_rehash (h); -} - /* Default size for hash tables if not specified. */ enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 }; diff --git a/src/minibuf.c b/src/minibuf.c index d932dbe8e2..a775d04332 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion, Stry_completion, 2, 3, 0, bucket = AREF (collection, idx); } - if (HASH_TABLE_P (collection)) - hash_rehash_if_needed (XHASH_TABLE (collection)); - while (1) { /* Get the next element of the alist, obarray, or hash-table. */ diff --git a/src/pdumper.c b/src/pdumper.c index c00f8a0af5..a3480b4b15 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -392,6 +392,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint) The start of the cold region is always aligned on a page boundary. */ dump_off cold_start; + + dump_off hash_list; }; /* Double-ended singly linked list. */ @@ -549,6 +551,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint) heap objects. */ Lisp_Object bignum_data; + Lisp_Object hash_tables; + unsigned number_hot_relocations; unsigned number_discardable_relocations; }; @@ -2647,42 +2651,61 @@ dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash) /* Return a list of (KEY . VALUE) pairs in the given hash table. */ static Lisp_Object -hash_table_contents (Lisp_Object table) +hash_table_contents (struct Lisp_Hash_Table *h) { Lisp_Object contents = Qnil; - struct Lisp_Hash_Table *h = XHASH_TABLE (table); for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i) if (!NILP (HASH_HASH (h, i))) - dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i))); + { + dump_push (&contents, HASH_KEY (h, i)); + dump_push (&contents, HASH_VALUE (h, i)); + } return Fnreverse (contents); } -/* Copy the given hash table, rehash it, and make sure that we can - look up all the values in the original. */ +static dump_off +dump_hash_table_list (struct dump_context *ctx) +{ + if (CONSP (ctx->hash_tables)) + return dump_object (ctx, ctx->hash_tables); + else + return 0; +} + static void -check_hash_table_rehash (Lisp_Object table_orig) -{ - hash_rehash_if_needed (XHASH_TABLE (table_orig)); - Lisp_Object table_rehashed = Fcopy_hash_table (table_orig); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - XHASH_TABLE (table_rehashed)->count *= -1; - eassert (XHASH_TABLE (table_rehashed)->count <= 0); - hash_rehash_if_needed (XHASH_TABLE (table_rehashed)); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - Lisp_Object expected_contents = hash_table_contents (table_orig); - while (!NILP (expected_contents)) +hash_table_freeze (struct Lisp_Hash_Table *h) +{ + if (h->count > 0) + h->count = -h->count; + h->key_and_value = hash_table_contents (h); + ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2; + if (nkeys == 0) + nkeys = 1; + h->index = h->next = h->hash = make_fixnum (nkeys); + h->count = nkeys; +} + +/* Defined in fns.c. */ +extern void hash_table_rehash (struct Lisp_Hash_Table *h); + +static void +hash_table_thaw (struct Lisp_Hash_Table *h) +{ + Lisp_Object count = h->index; + h->index = Fmake_vector (h->index, make_fixnum (-1)); + h->hash = Fmake_vector (h->hash, Qnil); + h->next = Fmake_vector (h->next, make_fixnum (-1)); + Lisp_Object key_and_value = h->key_and_value; + h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)), Qnil); + h->next_free = -1; + ptrdiff_t i = 0; + while (CONSP (key_and_value)) { - Lisp_Object key_value_pair = dump_pop (&expected_contents); - Lisp_Object key = XCAR (key_value_pair); - Lisp_Object expected_value = XCDR (key_value_pair); - Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied; - Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary); - eassert (EQ (expected_value, found_value)); - Fremhash (key, table_rehashed); + ASET (h->key_and_value, i++, XCAR (key_and_value)); + key_and_value = XCDR (key_and_value); } - eassert (EQ (Fhash_table_count (table_rehashed), - make_fixnum (0))); + hash_table_rehash (h); } static dump_off @@ -2707,8 +2730,6 @@ dump_hash_table (struct dump_context *ctx, { eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE || offset == DUMP_OBJECT_NOT_SEEN); - /* We still want to dump the actual keys and values now. */ - dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE); /* We'll get to the rest later. */ offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE; dump_remember_object (ctx, object, offset); @@ -2717,22 +2738,11 @@ dump_hash_table (struct dump_context *ctx, return offset; } - if (PDUMPER_CHECK_REHASHING) - check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike)); - struct Lisp_Hash_Table hash_munged = *hash_in; struct Lisp_Hash_Table *hash = &hash_munged; - /* Remember to rehash this hash table on first access. After a - dump reload, the hash table values will have changed, so we'll - need to rebuild the index. - - TODO: for EQ and EQL hash tables, it should be possible to rehash - here using the preferred load address of the dump, eliminating - the need to rehash-on-access if we can load the dump where we - want. */ - if (hash->count > 0 && !is_stable) - hash->count = -hash->count; + hash_table_freeze (hash); + dump_push (&ctx->hash_tables, object); START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out); dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header); @@ -4141,6 +4151,20 @@ DEFUN ("dump-emacs-portable", || !NILP (ctx->deferred_hash_tables) || !NILP (ctx->deferred_symbols)); + ctx->header.hash_list = ctx->offset; + dump_push (&ctx->hash_tables, Qnil); + dump_hash_table_list (ctx); + + do + { + dump_drain_deferred_hash_tables (ctx); + dump_drain_deferred_symbols (ctx); + dump_drain_normal_queue (ctx); + } + while (!dump_queue_empty_p (&ctx->dump_queue) + || !NILP (ctx->deferred_hash_tables) + || !NILP (ctx->deferred_symbols)); + dump_sort_copied_objects (ctx); /* While we copy built-in symbols into the Emacs image, these @@ -5429,6 +5453,13 @@ pdumper_load (const char *dump_filename) for (int i = 0; i < ARRAYELTS (sections); ++i) dump_mmap_reset (§ions[i]); + if (header->hash_list) + { + Lisp_Object hash_tables = + ((Lisp_Object *)(dump_base + header->hash_list))[1]; + Vpdumper_hash_tables = hash_tables; + } + /* Run the functions Emacs registered for doing post-dump-load initialization. */ for (int i = 0; i < nr_dump_hooks; ++i) @@ -5498,12 +5529,33 @@ DEFUN ("pdumper-stats", Fpdumper_stats, Spdumper_stats, 0, 0, 0, #endif /* HAVE_PDUMPER */ + \f +static void thaw_hash_tables (void) +{ + Lisp_Object hash_tables = Vpdumper_hash_tables; + while (CONSP (hash_tables)) + { + hash_table_thaw (XHASH_TABLE (XCAR (hash_tables))); + hash_tables = XCDR (hash_tables); + } +} + +void +init_pdumper_once (void) +{ + Vpdumper_hash_tables = Qnil; + pdumper_do_now_and_after_load (thaw_hash_tables); +} + void syms_of_pdumper (void) { #ifdef HAVE_PDUMPER + DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables, + doc: /* A list of hash tables that need to be thawed after loading the pdump. */); + Vpdumper_hash_tables = Qnil; defsubr (&Sdump_emacs_portable); defsubr (&Sdump_emacs_portable__sort_predicate); defsubr (&Sdump_emacs_portable__sort_predicate_copied); diff --git a/src/pdumper.h b/src/pdumper.h index ab2f426c1e..cfea06d33d 100644 --- a/src/pdumper.h +++ b/src/pdumper.h @@ -248,6 +248,7 @@ pdumper_clear_marks (void) file was loaded. */ extern void pdumper_record_wd (const char *); +void init_pdumper_once (void); void syms_of_pdumper (void); INLINE_HEADER_END -- 2.20.1 ^ permalink raw reply related [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 21:52 ` Pip Cet @ 2019-07-05 22:10 ` Stefan Monnier 2019-07-06 6:45 ` Eli Zaretskii 2019-07-06 15:32 ` Michael Heerdegen 1 sibling, 1 reply; 53+ messages in thread From: Stefan Monnier @ 2019-07-05 22:10 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447 >> >> His patch can/should be made slightly more efficient by only doing the >> >> Fcopy_sequence on those hash-tables that are in purespace. >> > How do we test for that? >> With PURE_P? > That only works before we dump, I think? If so, it's new (probably introduced by the pdumper). > Indeed. I'm attaching a proof of concept that we can simply freeze the > hash tables when dumping and thaw them when loading a dump, which > includes rehashing. Do you happen to know why it wasn't done that way? I'd guess it was to shorten the startup time by doing this rehashing lazily. Stefan ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 22:10 ` Stefan Monnier @ 2019-07-06 6:45 ` Eli Zaretskii 2019-07-06 15:08 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Eli Zaretskii @ 2019-07-06 6:45 UTC (permalink / raw) To: Stefan Monnier; +Cc: michael_heerdegen, npostavs, pipcet, 36447 > From: Stefan Monnier <monnier@iro.umontreal.ca> > Cc: Eli Zaretskii <eliz@gnu.org>, michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > Date: Fri, 05 Jul 2019 18:10:56 -0400 > > > Indeed. I'm attaching a proof of concept that we can simply freeze the > > hash tables when dumping and thaw them when loading a dump, which > > includes rehashing. Do you happen to know why it wasn't done that way? > > I'd guess it was to shorten the startup time by doing this rehashing lazily. The function pdumper-stats with show the time it took to load Emacs, so the effect of this on the load time can be measured. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-06 6:45 ` Eli Zaretskii @ 2019-07-06 15:08 ` Pip Cet 2019-07-09 21:05 ` Stefan Monnier 0 siblings, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-06 15:08 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, 36447, Stefan Monnier [-- Attachment #1: Type: text/plain, Size: 2135 bytes --] On Sat, Jul 6, 2019 at 6:45 AM Eli Zaretskii <eliz@gnu.org> wrote: > > > Indeed. I'm attaching a proof of concept that we can simply freeze the > > > hash tables when dumping and thaw them when loading a dump, which > > > includes rehashing. Do you happen to know why it wasn't done that way? > > > > I'd guess it was to shorten the startup time by doing this rehashing lazily. > > The function pdumper-stats with show the time it took to load Emacs, > so the effect of this on the load time can be measured I'm measuring it directly, and it's more than I expected: about a millisecond, for 4,300 hash table entries. What we can't easily measure is how much the lazy rehashing code would slow us down anyway. For comparison, the entire time stored in pdumper-stats is 15 ms here. I don't think that's significant, because we'd probably end up rehashing most of the large hash tables anyway. We're saving some 250 KB of space in the pdmp image, which was previously used for redundant information. (I'm surprised it's that much, but I guess pdumper relocations are fairly large?) I'm attaching a revised patch, which uses vectors rather than consed lists for both the key_and_value vector, avoiding a copy in the common case where there is more than one hash table entry, and for the list of hash tables. It still contains debugging/timing code. charset.c currently assumes hash table entries will stay at the same index in Vcharset_hash_table. I think that works okay in practice, because we don't shrink or reorder hash tables, but it was still a bit of a nasty surprise. This concept appears to work: modify pdumper to special-case hash tables and freeze/thaw them properly. You probably shouldn't dump hash tables with complicated user-defined hash functions. Both PURE_P and pdumper_object_p fail to distinguish between tables that were pure or impure before being dumped. This also fixes the bug that (hash-table-count dumped-hash-table) will return a negative number if no previous access to the hash table has happened, but of course we can fix that directly... Of course, we're still modifying purecopied information. [-- Attachment #2: 0001-Rehash-eagerly.patch --] [-- Type: text/x-patch, Size: 12848 bytes --] diff --git a/src/bytecode.c b/src/bytecode.c index 29dff44f00..9c72429e0c 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -1402,7 +1402,6 @@ #define DEFINE(name, value) LABEL (name) , Lisp_Object v1 = POP; ptrdiff_t i; struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table); - hash_rehash_if_needed (h); /* h->count is a faster approximation for HASH_TABLE_SIZE (h) here. */ diff --git a/src/composite.c b/src/composite.c index 183062de46..49a285cff0 100644 --- a/src/composite.c +++ b/src/composite.c @@ -654,7 +654,6 @@ gstring_lookup_cache (Lisp_Object header) composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len) { struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table); - hash_rehash_if_needed (h); Lisp_Object header = LGSTRING_HEADER (gstring); EMACS_UINT hash = h->test.hashfn (&h->test, header); if (len < 0) diff --git a/src/emacs.c b/src/emacs.c index 32bb57e272..a5969a96a3 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1577,6 +1577,7 @@ main (int argc, char **argv) if (!initialized) { init_alloc_once (); + init_pdumper_once (); init_obarray_once (); init_eval_once (); init_charset_once (); diff --git a/src/fns.c b/src/fns.c index 2fc000a7f4..8b60311614 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4218,6 +4218,9 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) } } +extern void +hash_table_rehash (struct Lisp_Hash_Table *h); + void hash_table_rehash (struct Lisp_Hash_Table *h) { @@ -4226,12 +4229,11 @@ hash_table_rehash (struct Lisp_Hash_Table *h) /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (HASH_HASH (h, i))) - { - Lisp_Object key = HASH_KEY (h, i); - EMACS_UINT hash_code = h->test.hashfn (&h->test, key); - set_hash_hash_slot (h, i, make_fixnum (hash_code)); - } + { + Lisp_Object key = HASH_KEY (h, i); + EMACS_UINT hash_code = h->test.hashfn (&h->test, key); + set_hash_hash_slot (h, i, make_fixnum (hash_code)); + } /* Reset the index so that any slot we don't fill below is marked invalid. */ @@ -4247,13 +4249,6 @@ hash_table_rehash (struct Lisp_Hash_Table *h) set_hash_index_slot (h, start_of_bucket, i); eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ } - - /* Finally, mark the hash table as having a valid hash order. - Do this last so that if we're interrupted, we retry on next - access. */ - eassert (h->count < 0); - h->count = -h->count; - eassert (!hash_rehash_needed_p (h)); } /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH @@ -4266,8 +4261,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, EMACS_UINT *hash) EMACS_UINT hash_code; ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - hash_code = h->test.hashfn (&h->test, key); eassert ((hash_code & ~INTMASK) == 0); if (hash) @@ -4296,8 +4289,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value, { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - eassert ((hash & ~INTMASK) == 0); /* Increment count after resizing because resizing may fail. */ @@ -4331,8 +4322,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key) ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); ptrdiff_t prev = -1; - hash_rehash_if_needed (h); - for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i)) diff --git a/src/lisp.h b/src/lisp.h index 1a1d8ee7e4..7b77dd97f5 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2370,21 +2370,6 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h) return ASIZE (h->next); } -void hash_table_rehash (struct Lisp_Hash_Table *h); - -INLINE bool -hash_rehash_needed_p (const struct Lisp_Hash_Table *h) -{ - return h->count < 0; -} - -INLINE void -hash_rehash_if_needed (struct Lisp_Hash_Table *h) -{ - if (hash_rehash_needed_p (h)) - hash_table_rehash (h); -} - /* Default size for hash tables if not specified. */ enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 }; diff --git a/src/minibuf.c b/src/minibuf.c index d932dbe8e2..a775d04332 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1203,9 +1203,6 @@ DEFUN ("try-completion", Ftry_completion, Stry_completion, 2, 3, 0, bucket = AREF (collection, idx); } - if (HASH_TABLE_P (collection)) - hash_rehash_if_needed (XHASH_TABLE (collection)); - while (1) { /* Get the next element of the alist, obarray, or hash-table. */ diff --git a/src/pdumper.c b/src/pdumper.c index c00f8a0af5..5f0635aa4d 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -392,6 +392,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint) The start of the cold region is always aligned on a page boundary. */ dump_off cold_start; + + dump_off hash_list; }; /* Double-ended singly linked list. */ @@ -549,6 +551,8 @@ dump_fingerprint (const char *label, unsigned char const *xfingerprint) heap objects. */ Lisp_Object bignum_data; + Lisp_Object hash_tables; + unsigned number_hot_relocations; unsigned number_discardable_relocations; }; @@ -2647,42 +2651,65 @@ dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash) /* Return a list of (KEY . VALUE) pairs in the given hash table. */ static Lisp_Object -hash_table_contents (Lisp_Object table) +hash_table_contents (struct Lisp_Hash_Table *h) { Lisp_Object contents = Qnil; - struct Lisp_Hash_Table *h = XHASH_TABLE (table); - for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i) + /* Make sure key_and_value ends up in the same order, charset.c + relies on it by expecting hash table indices to stay constant + across the dump. */ + for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i) if (!NILP (HASH_HASH (h, i))) - dump_push (&contents, Fcons (HASH_KEY (h, i), HASH_VALUE (h, i))); - return Fnreverse (contents); + { + dump_push (&contents, HASH_VALUE (h, i)); + dump_push (&contents, HASH_KEY (h, i)); + } + return CALLN (Fapply, Qvector, contents); +} + +static dump_off +dump_hash_table_list (struct dump_context *ctx) +{ + if (CONSP (ctx->hash_tables)) + return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables)); + else + return 0; +} + +static void +hash_table_freeze (struct Lisp_Hash_Table *h) +{ + h->key_and_value = hash_table_contents (h); + ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2; + h->count = nkeys; + if (nkeys == 0) + nkeys = 1; + h->index = h->next = h->hash = make_fixnum (nkeys); } -/* Copy the given hash table, rehash it, and make sure that we can - look up all the values in the original. */ +/* Defined in fns.c. */ +extern void hash_table_rehash (struct Lisp_Hash_Table *h); + static void -check_hash_table_rehash (Lisp_Object table_orig) -{ - hash_rehash_if_needed (XHASH_TABLE (table_orig)); - Lisp_Object table_rehashed = Fcopy_hash_table (table_orig); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - XHASH_TABLE (table_rehashed)->count *= -1; - eassert (XHASH_TABLE (table_rehashed)->count <= 0); - hash_rehash_if_needed (XHASH_TABLE (table_rehashed)); - eassert (XHASH_TABLE (table_rehashed)->count >= 0); - Lisp_Object expected_contents = hash_table_contents (table_orig); - while (!NILP (expected_contents)) +hash_table_thaw (struct Lisp_Hash_Table *h) +{ + Lisp_Object count = h->index; + h->index = Fmake_vector (h->index, make_fixnum (-1)); + h->hash = Fmake_vector (h->hash, Qnil); + h->next = Fmake_vector (h->next, make_fixnum (-1)); + Lisp_Object key_and_value = h->key_and_value; + h->next_free = -1; + if (XFIXNAT (count) <= 1) { - Lisp_Object key_value_pair = dump_pop (&expected_contents); - Lisp_Object key = XCAR (key_value_pair); - Lisp_Object expected_value = XCDR (key_value_pair); - Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied; - Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary); - eassert (EQ (expected_value, found_value)); - Fremhash (key, table_rehashed); + h->key_and_value = Fmake_vector (make_fixnum (2 * XFIXNAT (count)), Qnil); + ptrdiff_t i = 0; + while (i < ASIZE (key_and_value)) + { + ASET (h->key_and_value, i, AREF (key_and_value, i)); + i++; + } } - eassert (EQ (Fhash_table_count (table_rehashed), - make_fixnum (0))); + hash_table_rehash (h); } static dump_off @@ -2707,8 +2734,6 @@ dump_hash_table (struct dump_context *ctx, { eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE || offset == DUMP_OBJECT_NOT_SEEN); - /* We still want to dump the actual keys and values now. */ - dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE); /* We'll get to the rest later. */ offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE; dump_remember_object (ctx, object, offset); @@ -2717,22 +2742,11 @@ dump_hash_table (struct dump_context *ctx, return offset; } - if (PDUMPER_CHECK_REHASHING) - check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike)); - struct Lisp_Hash_Table hash_munged = *hash_in; struct Lisp_Hash_Table *hash = &hash_munged; - /* Remember to rehash this hash table on first access. After a - dump reload, the hash table values will have changed, so we'll - need to rebuild the index. - - TODO: for EQ and EQL hash tables, it should be possible to rehash - here using the preferred load address of the dump, eliminating - the need to rehash-on-access if we can load the dump where we - want. */ - if (hash->count > 0 && !is_stable) - hash->count = -hash->count; + hash_table_freeze (hash); + dump_push (&ctx->hash_tables, object); START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out); dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header); @@ -4141,6 +4155,19 @@ DEFUN ("dump-emacs-portable", || !NILP (ctx->deferred_hash_tables) || !NILP (ctx->deferred_symbols)); + ctx->header.hash_list = ctx->offset; + dump_hash_table_list (ctx); + + do + { + dump_drain_deferred_hash_tables (ctx); + dump_drain_deferred_symbols (ctx); + dump_drain_normal_queue (ctx); + } + while (!dump_queue_empty_p (&ctx->dump_queue) + || !NILP (ctx->deferred_hash_tables) + || !NILP (ctx->deferred_symbols)); + dump_sort_copied_objects (ctx); /* While we copy built-in symbols into the Emacs image, these @@ -5429,6 +5456,13 @@ pdumper_load (const char *dump_filename) for (int i = 0; i < ARRAYELTS (sections); ++i) dump_mmap_reset (§ions[i]); + if (header->hash_list) + { + struct Lisp_Vector *hash_tables = + ((struct Lisp_Vector *)(dump_base + header->hash_list)); + XSETVECTOR (Vpdumper_hash_tables, hash_tables); + } + /* Run the functions Emacs registered for doing post-dump-load initialization. */ for (int i = 0; i < nr_dump_hooks; ++i) @@ -5500,10 +5534,41 @@ DEFUN ("pdumper-stats", Fpdumper_stats, Spdumper_stats, 0, 0, 0, \f +static void thaw_hash_tables (void) +{ + const struct timespec start_time = current_timespec (); + Lisp_Object hash_tables = Vpdumper_hash_tables; + ptrdiff_t total_size = 0; + ptrdiff_t total_count = 0; + ptrdiff_t i = 0; + if (VECTORP (hash_tables)) + while (i < ASIZE (hash_tables)) + { + hash_table_thaw (XHASH_TABLE (AREF (hash_tables, i))); + total_size += XFIXNAT (Flength (XHASH_TABLE (AREF (hash_tables, i))->key_and_value)); + total_count ++; + i++; + } + + const struct timespec end_time = current_timespec (); + fprintf (stderr, "%ld hash tables, %ld key/values %f s\n", + total_count, total_size, timespectod(timespec_sub (end_time, start_time))); +} + +void +init_pdumper_once (void) +{ + Vpdumper_hash_tables = Qnil; + pdumper_do_now_and_after_load (thaw_hash_tables); +} + void syms_of_pdumper (void) { #ifdef HAVE_PDUMPER + DEFVAR_LISP ("pdumper-hash-tables", Vpdumper_hash_tables, + doc: /* A list of hash tables that need to be thawed after loading the pdump. */); + Vpdumper_hash_tables = Fmake_vector (make_fixnum (0), Qnil); defsubr (&Sdump_emacs_portable); defsubr (&Sdump_emacs_portable__sort_predicate); defsubr (&Sdump_emacs_portable__sort_predicate_copied); diff --git a/src/pdumper.h b/src/pdumper.h index ab2f426c1e..cfea06d33d 100644 --- a/src/pdumper.h +++ b/src/pdumper.h @@ -248,6 +248,7 @@ pdumper_clear_marks (void) file was loaded. */ extern void pdumper_record_wd (const char *); +void init_pdumper_once (void); void syms_of_pdumper (void); INLINE_HEADER_END ^ permalink raw reply related [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-06 15:08 ` Pip Cet @ 2019-07-09 21:05 ` Stefan Monnier 2019-07-10 2:38 ` Eli Zaretskii 0 siblings, 1 reply; 53+ messages in thread From: Stefan Monnier @ 2019-07-09 21:05 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, 36447 I think we should get Daniel's opinion on this. Stefan > On Sat, Jul 6, 2019 at 6:45 AM Eli Zaretskii <eliz@gnu.org> wrote: >> > > Indeed. I'm attaching a proof of concept that we can simply freeze the >> > > hash tables when dumping and thaw them when loading a dump, which >> > > includes rehashing. Do you happen to know why it wasn't done that way? >> > >> > I'd guess it was to shorten the startup time by doing this rehashing lazily. >> >> The function pdumper-stats with show the time it took to load Emacs, >> so the effect of this on the load time can be measured > > I'm measuring it directly, and it's more than I expected: about a > millisecond, for 4,300 hash table entries. What we can't easily > measure is how much the lazy rehashing code would slow us down anyway. > > For comparison, the entire time stored in pdumper-stats is 15 ms here. > > I don't think that's significant, because we'd probably end up > rehashing most of the large hash tables anyway. We're saving some 250 > KB of space in the pdmp image, which was previously used for redundant > information. (I'm surprised it's that much, but I guess pdumper > relocations are fairly large?) > > I'm attaching a revised patch, which uses vectors rather than consed > lists for both the key_and_value vector, avoiding a copy in the common > case where there is more than one hash table entry, and for the list > of hash tables. It still contains debugging/timing code. > > charset.c currently assumes hash table entries will stay at the same > index in Vcharset_hash_table. I think that works okay in practice, > because we don't shrink or reorder hash tables, but it was still a bit > of a nasty surprise. > > This concept appears to work: modify pdumper to special-case hash > tables and freeze/thaw them properly. You probably shouldn't dump hash > tables with complicated user-defined hash functions. > > Both PURE_P and pdumper_object_p fail to distinguish between tables > that were pure or impure before being dumped. > > This also fixes the bug that (hash-table-count dumped-hash-table) will > return a negative number if no previous access to the hash table has > happened, but of course we can fix that directly... > > Of course, we're still modifying purecopied information. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-09 21:05 ` Stefan Monnier @ 2019-07-10 2:38 ` Eli Zaretskii 2019-07-10 3:19 ` Daniel Colascione 0 siblings, 1 reply; 53+ messages in thread From: Eli Zaretskii @ 2019-07-10 2:38 UTC (permalink / raw) To: Stefan Monnier, Daniel Colascione Cc: michael_heerdegen, npostavs, pipcet, 36447 > From: Stefan Monnier <monnier@iro.umontreal.ca> > Cc: Eli Zaretskii <eliz@gnu.org>, michael_heerdegen@web.de, npostavs@gmail.com, 36447@debbugs.gnu.org > Date: Tue, 09 Jul 2019 17:05:53 -0400 > > I think we should get Daniel's opinion on this. Daniel, could you please comment on the issues discussed in this bug? > > On Sat, Jul 6, 2019 at 6:45 AM Eli Zaretskii <eliz@gnu.org> wrote: > >> > > Indeed. I'm attaching a proof of concept that we can simply freeze the > >> > > hash tables when dumping and thaw them when loading a dump, which > >> > > includes rehashing. Do you happen to know why it wasn't done that way? > >> > > >> > I'd guess it was to shorten the startup time by doing this rehashing lazily. > >> > >> The function pdumper-stats with show the time it took to load Emacs, > >> so the effect of this on the load time can be measured > > > > I'm measuring it directly, and it's more than I expected: about a > > millisecond, for 4,300 hash table entries. What we can't easily > > measure is how much the lazy rehashing code would slow us down anyway. > > > > For comparison, the entire time stored in pdumper-stats is 15 ms here. > > > > I don't think that's significant, because we'd probably end up > > rehashing most of the large hash tables anyway. We're saving some 250 > > KB of space in the pdmp image, which was previously used for redundant > > information. (I'm surprised it's that much, but I guess pdumper > > relocations are fairly large?) > > > > I'm attaching a revised patch, which uses vectors rather than consed > > lists for both the key_and_value vector, avoiding a copy in the common > > case where there is more than one hash table entry, and for the list > > of hash tables. It still contains debugging/timing code. > > > > charset.c currently assumes hash table entries will stay at the same > > index in Vcharset_hash_table. I think that works okay in practice, > > because we don't shrink or reorder hash tables, but it was still a bit > > of a nasty surprise. > > > > This concept appears to work: modify pdumper to special-case hash > > tables and freeze/thaw them properly. You probably shouldn't dump hash > > tables with complicated user-defined hash functions. > > > > Both PURE_P and pdumper_object_p fail to distinguish between tables > > that were pure or impure before being dumped. > > > > This also fixes the bug that (hash-table-count dumped-hash-table) will > > return a negative number if no previous access to the hash table has > > happened, but of course we can fix that directly... > > > > Of course, we're still modifying purecopied information. > > ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-10 2:38 ` Eli Zaretskii @ 2019-07-10 3:19 ` Daniel Colascione 2019-07-10 15:01 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Daniel Colascione @ 2019-07-10 3:19 UTC (permalink / raw) To: Eli Zaretskii; +Cc: michael_heerdegen, npostavs, Stefan Monnier, pipcet, 36447 >> From: Stefan Monnier <monnier@iro.umontreal.ca> >> Cc: Eli Zaretskii <eliz@gnu.org>, michael_heerdegen@web.de, >> npostavs@gmail.com, 36447@debbugs.gnu.org >> Date: Tue, 09 Jul 2019 17:05:53 -0400 >> >> I think we should get Daniel's opinion on this. > > Daniel, could you please comment on the issues discussed in this bug? Thanks for debugging this problem. It really is nasty. AIUI, the problem is that hash-consing the hash vectors might unify vectors that happen to have the same contents under one hashing scheme but different contents under a different hashing scheme, so when we rehash lazily, we correctly rehash one hash table and corrupt a different hash table's index array by side effect. There are two proposed solutions: 1) Copy the hash table internal vectors on rehash, and 2) "Freeze" hash tables by eliminating the index arrays and rebuilding them all eagerly on dump start. #1 works, but it's somewhat inefficient. #2 is a variant of #1, in a sense. Instead of copying the hash table vectors and mutating them, we rebuild them from scratch. I don't understand why we have to do that eagerly. #1 isn't as bad as you might think. The existing hash table rehashing stuff is inefficient anyway. Suppose we dump a hash table, load a dump, and access the hash table. Right now, we do the rehashing and take COW faults for the arrays we mutate. So far, so good. What happens if we grow the hash table past its load factor limit? We allocate new vectors and rehash into those, forgetting the old vectors. In the non-pdumper case, GC will collect those older vectors eventually. In the pdumper case, those COWed pages will stick around in memory forever, unused. I don't think it counts as a "leak", since the memory waste is bounded, but it's still memory waste. If we use approach #1, we don't mutate the hash table pages mapped from the dump. Besides, the amount of work we do is actually the same. In the COW-page case, the kernel allocates a new page, reads the dump bytes, and writes them to the new page. In the Fcopy_sequence case, *we* allocate a new vector, read the dump bytes, and write them into anonymous memory. It's the same work either way, except that if we copy, when we grow the hash table, we can actually free the original vectors. IMHO, the right approach is to check in #1 for now and switch to a #2-like approach once master is stable. Noticing that we don't actually have to store the hash table internal arrays in the dump is a good catch. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-10 3:19 ` Daniel Colascione @ 2019-07-10 15:01 ` Pip Cet 2019-07-10 17:16 ` Daniel Colascione 0 siblings, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-10 15:01 UTC (permalink / raw) To: Daniel Colascione; +Cc: michael_heerdegen, npostavs, 36447, Stefan Monnier On Wed, Jul 10, 2019 at 3:19 AM Daniel Colascione <dancol@dancol.org> wrote: > Thanks for debugging this problem. It really is nasty. AIUI, the problem > is that hash-consing the hash vectors might unify vectors that happen to > have the same contents under one hashing scheme but different contents > under a different hashing scheme, so when we rehash lazily, we correctly > rehash one hash table and corrupt a different hash table's index array by > side effect. There are two proposed solutions: > > 1) Copy the hash table internal vectors on rehash, and > 2) "Freeze" hash tables by eliminating the index arrays and rebuilding > them all eagerly on dump start. That summary is correct, I think. > #1 works, but it's somewhat inefficient. > > #2 is a variant of #1, in a sense. Instead of copying the hash table > vectors and mutating them, we rebuild them from scratch. I don't > understand why we have to do that eagerly. I'm sorry if I suggested that we must do that. On the contrary, both options are entirely viable, though on balance I prefer the eager version. The lazy approach makes the code more complicated, slightly slower, and introduced what appears to me to be at least one bug (Fhash_table_count returns a negative integer if called with a non-rehashed hastable). The eager approach slows down startup by a fraction of a millisecond (it might be an entire millisecond if your Emacs session doesn't access any of the dumped hash tables at all, but since it does tend to do that, it'll be less in practice). > #1 isn't as bad as you might think. But it's not as good as "do #1 but only if PURE_P", which no longer works. > It's the same work either way, except that if we copy, when we grow the > hash table, we can actually free the original vectors. I'd like to restrict #1 to hash tables which are somehow immutable, either because they're pure or because we actually introduce immutable objects, so they'd never grow. Mutable hash tables must not share their index vectors anyway. > IMHO, the right approach is to check in #1 for now and switch to a #2-like > approach once master is stable. Noticing that we don't actually have to > store the hash table internal arrays in the dump is a good catch. I agree, but I think we should discuss the future of pure space, too. Maybe we should have a separate bug for that, though. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-10 15:01 ` Pip Cet @ 2019-07-10 17:16 ` Daniel Colascione 2019-07-10 20:14 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Daniel Colascione @ 2019-07-10 17:16 UTC (permalink / raw) To: Pip Cet; +Cc: michael_heerdegen, npostavs, Stefan Monnier, 36447 > On Wed, Jul 10, 2019 at 3:19 AM Daniel Colascione <dancol@dancol.org> > wrote: >> #1 works, but it's somewhat inefficient. >> >> #2 is a variant of #1, in a sense. Instead of copying the hash table >> vectors and mutating them, we rebuild them from scratch. I don't >> understand why we have to do that eagerly. > > I'm sorry if I suggested that we must do that. On the contrary, both > options are entirely viable, though on balance I prefer the eager > version. > > The lazy approach makes the code more complicated, I don't think doing it eagerly is a complexity win. The eager patch posted upthread is a wash. > slightly slower, No it doesn't. The hash table branch on negative count should be predicted correctly and involves a test against a cache line we're loading anyway. We can add explicit likely() and unlikely() annotations too. > and introduced what appears to me to be at least one bug > (Fhash_table_count returns a negative integer if called with a > non-rehashed hastable). That's a trivial oversight and not an inherent difficulty in the lazy approach. > The eager approach slows down startup by a fraction of a millisecond > (it might be an entire millisecond if your Emacs session doesn't > access any of the dumped hash tables at all, but since it does tend to > do that, it'll be less in practice). A millisecond here and a millisecond there and things end up costing real time. A lazy approach isn't any harder than an eager one and >> #1 isn't as bad as you might think. > > But it's not as good as "do #1 but only if PURE_P", which no longer works. > >> It's the same work either way, except that if we copy, when we grow the >> hash table, we can actually free the original vectors. > > I'd like to restrict #1 to hash tables which are somehow immutable, > either because they're pure or because we actually introduce immutable > objects, so they'd never grow. Mutable hash tables must not share > their index vectors anyway. Did you miss the part about avoiding copy-on-write faults? Those hash table vectors are going to be copied anyway, and we might as well do it ourselves instead of making the kernel do it. (unexec has the same inefficiency, BTW.) We should just do #1 for all hash tables. >> IMHO, the right approach is to check in #1 for now and switch to a >> #2-like >> approach once master is stable. Noticing that we don't actually have to >> store the hash table internal arrays in the dump is a good catch. > > I agree, but I think we should discuss the future of pure space, too. > Maybe we should have a separate bug for that, though. Sure. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-10 17:16 ` Daniel Colascione @ 2019-07-10 20:14 ` Pip Cet 0 siblings, 0 replies; 53+ messages in thread From: Pip Cet @ 2019-07-10 20:14 UTC (permalink / raw) To: Daniel Colascione; +Cc: michael_heerdegen, npostavs, 36447, Stefan Monnier On Wed, Jul 10, 2019 at 5:16 PM Daniel Colascione <dancol@dancol.org> wrote: > We should just do #1 for all hash tables. I think that's what we're currently doing. We should close this bug and open new ones for tangential issues (pure space, Fhash_table_count, and whether lazy rehashing is a good idea), if and when someone feels like discussing those issues. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 21:52 ` Pip Cet 2019-07-05 22:10 ` Stefan Monnier @ 2019-07-06 15:32 ` Michael Heerdegen 2019-07-08 17:30 ` Lars Ingebrigtsen 1 sibling, 1 reply; 53+ messages in thread From: Michael Heerdegen @ 2019-07-06 15:32 UTC (permalink / raw) To: Pip Cet; +Cc: npostavs, 36447, Stefan Monnier Pip Cet <pipcet@gmail.com> writes: > From 83f915b32faf32e98ccd806179e379a13b76618d Mon Sep 17 00:00:00 2001 > From: Pip Cet <pipcet@gmail.com> > Date: Fri, 5 Jul 2019 21:50:44 +0000 > Subject: [PATCH] rehash hash tables eagerly after loading the pdumper dump. Pip, since you can reproduce yourself now, I'll just try this patch in the meantime. Michael. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-06 15:32 ` Michael Heerdegen @ 2019-07-08 17:30 ` Lars Ingebrigtsen 2019-07-08 17:58 ` Pip Cet 0 siblings, 1 reply; 53+ messages in thread From: Lars Ingebrigtsen @ 2019-07-08 17:30 UTC (permalink / raw) To: Michael Heerdegen; +Cc: 36447, Stefan Monnier, Pip Cet, npostavs I'm still seeing this... but not after every recompilation. If I say "make bootstrap", then Emacs usually works. If I then recompile parts, I sometimes get a non-working Emacs. For instance, at the moment if I try to eval the following: (defgroup compilation nil "Run compiler as inferior of Emacs, parse error messages." :group 'tools :group 'processes) I get: Debugger entered--Lisp error: (error "Unknown keyword :group") signal(error ("Unknown keyword :group")) error("Unknown keyword %s" :group) custom-handle-keyword(compilation :group tools custom-group) custom-declare-group(compilation nil "Run compiler as inferior of Emacs, parse error mes..." :group tools :group processes) eval-region(1213 1336 t #f(compiled-function (ignore) #<bytecode 0x156a64060ab9>)) ; Reading at buffer position 1247 elisp--eval-defun() eval-defun(nil) funcall-interactively(eval-defun nil) call-interactively(eval-defun nil nil) command-execute(eval-defun) Perhaps the original change should be reverted until you've figured out how to fix this completely? It's very annoying. -- (domestic pets only, the antidote for overdose, milk.) bloggy blog: http://lars.ingebrigtsen.no ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 17:30 ` Lars Ingebrigtsen @ 2019-07-08 17:58 ` Pip Cet 2019-07-08 22:18 ` Lars Ingebrigtsen ` (2 more replies) 0 siblings, 3 replies; 53+ messages in thread From: Pip Cet @ 2019-07-08 17:58 UTC (permalink / raw) To: Lars Ingebrigtsen; +Cc: Michael Heerdegen, 36447, npostavs, Stefan Monnier [-- Attachment #1: Type: text/plain, Size: 1525 bytes --] On Mon, Jul 8, 2019 at 5:30 PM Lars Ingebrigtsen <larsi@gnus.org> wrote: > I'm still seeing this... but not after every recompilation. If I say > "make bootstrap", then Emacs usually works. If I then recompile parts, > I sometimes get a non-working Emacs. Are you on master? As far as I know, no fix has been applied there yet. Also, if I understand correctly, the bug depends on ASLR both at the time of dump and in the final Emacs image. Also also, can you please stash copies of the emacs binaries and emacs.pdmp that fail to work? I did something very silly and overwrote my compiler with a very slightly different version (after carefully backing up /usr/local/bin/gcc, too. Too bad that's just the driver program), and now can no longer reproduce the issue easily. > Perhaps the original change should be reverted until you've figured out > how to fix this completely? It's very annoying. It is. I think it's worth the risk to push my original fix (reattached to this message) now, but I don't have commit privileges so it's not my decision to make. Alternatively, we could simply disable purify-flag hash consing for now and everything should work, I think. But I don't think there's a clear candidate for "the original change". The bug's been with us for a while, possibly since pdumper was introduced, but it got significantly worse when the recent byte compilation changes were committed; however, those changes are actually okay. Is there anything I can do to help get this bug fixed on master? Thanks! [-- Attachment #2: 0002-Don-t-alter-shared-structure-in-dumped-purecopied-ha.patch --] [-- Type: text/x-patch, Size: 964 bytes --] From 70419f630f60919c8645a10aeef8d299f5098ff5 Mon Sep 17 00:00:00 2001 From: Pip Cet <pipcet@gmail.com> Date: Wed, 3 Jul 2019 11:48:22 +0000 Subject: [PATCH 2/2] Don't alter shared structure in dumped purecopied hash tables. * src/fns.c (hash_table_rehash): Make sure we're operating on fresh copies of ->next, ->index, ->hash. --- src/fns.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/fns.c b/src/fns.c index 2fc000a7f4..44d2de523a 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4223,6 +4223,12 @@ hash_table_rehash (struct Lisp_Hash_Table *h) { ptrdiff_t size = HASH_TABLE_SIZE (h); + /* These structures may have been purecopied and shared + (bug#36447). */ + h->next = Fcopy_sequence (h->next); + h->index = Fcopy_sequence (h->index); + h->hash = Fcopy_sequence (h->hash); + /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ for (ptrdiff_t i = 0; i < size; ++i) -- 2.20.1 ^ permalink raw reply related [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 17:58 ` Pip Cet @ 2019-07-08 22:18 ` Lars Ingebrigtsen 2019-07-08 22:25 ` Noam Postavsky 2019-07-08 23:22 ` Stefan Monnier 2019-07-08 22:23 ` Michael Heerdegen 2019-07-09 20:15 ` Stefan Monnier 2 siblings, 2 replies; 53+ messages in thread From: Lars Ingebrigtsen @ 2019-07-08 22:18 UTC (permalink / raw) To: Pip Cet; +Cc: Michael Heerdegen, 36447, npostavs, Stefan Monnier Pip Cet <pipcet@gmail.com> writes: > Are you on master? Yup. > Also also, can you please stash copies of the emacs binaries and > emacs.pdmp that fail to work? I did something very silly and overwrote > my compiler with a very slightly different version (after carefully > backing up /usr/local/bin/gcc, too. Too bad that's just the driver > program), and now can no longer reproduce the issue easily. OK; will do the next time it happens (but it's gone again now after yet another "make bootstrap")... > Is there anything I can do to help get this bug fixed on master? Thanks! The change is just the following? + /* These structures may have been purecopied and shared + (bug#36447). */ + h->next = Fcopy_sequence (h->next); + h->index = Fcopy_sequence (h->index); + h->hash = Fcopy_sequence (h->hash); I know nothing about this, but did anybody want to fix this in a different way, or did the discussion just peter our? -- (domestic pets only, the antidote for overdose, milk.) bloggy blog: http://lars.ingebrigtsen.no ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 22:18 ` Lars Ingebrigtsen @ 2019-07-08 22:25 ` Noam Postavsky 2019-07-09 14:00 ` Pip Cet 2019-07-08 23:22 ` Stefan Monnier 1 sibling, 1 reply; 53+ messages in thread From: Noam Postavsky @ 2019-07-08 22:25 UTC (permalink / raw) To: Lars Ingebrigtsen; +Cc: Michael Heerdegen, Stefan Monnier, Pip Cet, 36447 Lars Ingebrigtsen <larsi@gnus.org> writes: > Pip Cet <pipcet@gmail.com> writes: > >> Are you on master? > > Yup. I've switched byte-compile-cond-use-jump-table to nil for now, so you shouldn't be hitting this anymore. >> Also also, can you please stash copies of the emacs binaries and >> emacs.pdmp that fail to work? I did something very silly and overwrote >> my compiler with a very slightly different version (after carefully >> backing up /usr/local/bin/gcc, too. Too bad that's just the driver >> program), and now can no longer reproduce the issue easily. > > OK; will do the next time it happens (but it's gone again now after yet > another "make bootstrap")... > >> Is there anything I can do to help get this bug fixed on master? Thanks! > > The change is just the following? > > + /* These structures may have been purecopied and shared > + (bug#36447). */ > + h->next = Fcopy_sequence (h->next); > + h->index = Fcopy_sequence (h->index); > + h->hash = Fcopy_sequence (h->hash); > > I know nothing about this, but did anybody want to fix this in a > different way, or did the discussion just peter our? There were some suggestions about checking PURE_P or similar to see if the copy is really needed, but then apparently PURE_P doesn't work at all? I've been meaning to check that out, because it seems like there are still a few things which aren't making sense. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 22:25 ` Noam Postavsky @ 2019-07-09 14:00 ` Pip Cet 2019-07-10 3:01 ` Daniel Colascione 0 siblings, 1 reply; 53+ messages in thread From: Pip Cet @ 2019-07-09 14:00 UTC (permalink / raw) To: Noam Postavsky Cc: Michael Heerdegen, Lars Ingebrigtsen, Stefan Monnier, 36447 On Mon, Jul 8, 2019 at 10:25 PM Noam Postavsky <npostavs@gmail.com> wrote: > I've switched byte-compile-cond-use-jump-table to nil for now, so you > shouldn't be hitting this anymore. Thanks. > > I know nothing about this, but did anybody want to fix this in a > > different way, or did the discussion just peter our? > > There were some suggestions about checking PURE_P or similar to see if > the copy is really needed, but then apparently PURE_P doesn't work at > all? I've been meaning to check that out, because it seems like there > are still a few things which aren't making sense. It does seem like we've effectively given up pure space when we switched to pdumper. We still fill it and spend a lot of time making sure not to waste any, but it's then simply copied to the dump along with the rest of the dumped image. It's still kept around in the final executable, so we're wasting quite a bit of disk space on it... Most importantly, I think, the CHECK_IMPURE macro now does nothing (except waste a few cycles), and we can freely setcar what should be purecopied conses. (One thing we might do is introduce immutable conses, and use them for `read', pure space, and in a few other places). ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-09 14:00 ` Pip Cet @ 2019-07-10 3:01 ` Daniel Colascione 2019-07-14 14:06 ` Noam Postavsky 0 siblings, 1 reply; 53+ messages in thread From: Daniel Colascione @ 2019-07-10 3:01 UTC (permalink / raw) To: Pip Cet Cc: Michael Heerdegen, Lars Ingebrigtsen, 36447, Stefan Monnier, Noam Postavsky > On Mon, Jul 8, 2019 at 10:25 PM Noam Postavsky <npostavs@gmail.com> wrote: >> I've switched byte-compile-cond-use-jump-table to nil for now, so you >> shouldn't be hitting this anymore. > > Thanks. > >> > I know nothing about this, but did anybody want to fix this in a >> > different way, or did the discussion just peter our? >> >> There were some suggestions about checking PURE_P or similar to see if >> the copy is really needed, but then apparently PURE_P doesn't work at >> all? I've been meaning to check that out, because it seems like there >> are still a few things which aren't making sense. > > It does seem like we've effectively given up pure space when we > switched to pdumper. We still fill it and spend a lot of time making > sure not to waste any, but it's then simply copied to the dump along > with the rest of the dumped image. We're at least combining identical objects, which is what this bug is about. > It's still kept around in the final > executable, so we're wasting quite a bit of disk space on it... At least the executable pure space isn't paged in. I prefer not to waste disk space, but wasting disk space without wasting memory is lower on my list of priorities than other things. > Most importantly, I think, the CHECK_IMPURE macro now does nothing > (except waste a few cycles), and we can freely setcar what should be > purecopied conses. > > (One thing we might do is introduce immutable conses, and use them for > `read', pure space, and in a few other places). We could use pureness information to relocate all the pure objects in the dump so that we're less likely to take COW faults on them during runtime. Right now, if a pure object and a regular object share a page in the dump and we modify the regular object, we copy the pure object uselessly. Note that we're not modifying these objects for GC, since we keep dump markbits separate from the dump image (as we really should be doing for the heap in general one day). ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-10 3:01 ` Daniel Colascione @ 2019-07-14 14:06 ` Noam Postavsky 0 siblings, 0 replies; 53+ messages in thread From: Noam Postavsky @ 2019-07-14 14:06 UTC (permalink / raw) To: Daniel Colascione Cc: Michael Heerdegen, Lars Ingebrigtsen, 36447, Pip Cet, Stefan Monnier tags 36447 fixed close 36447 quit "Daniel Colascione" <dancol@dancol.org> writes: >> It does seem like we've effectively given up pure space when we >> switched to pdumper. >> It's still kept around in the final >> executable, so we're wasting quite a bit of disk space on it... > > At least the executable pure space isn't paged in. I prefer not to waste > disk space, but wasting disk space without wasting memory is lower on my > list of priorities than other things. IMO, the main problem with the half-keeping of purespace is that it's confusing to people reading the code. At least some comments might be added? Anyway, closing the bug as the main problem has been resolved by now. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 22:18 ` Lars Ingebrigtsen 2019-07-08 22:25 ` Noam Postavsky @ 2019-07-08 23:22 ` Stefan Monnier 1 sibling, 0 replies; 53+ messages in thread From: Stefan Monnier @ 2019-07-08 23:22 UTC (permalink / raw) To: Lars Ingebrigtsen; +Cc: Michael Heerdegen, npostavs, Pip Cet, 36447 > The change is just the following? > > + /* These structures may have been purecopied and shared > + (bug#36447). */ > + h->next = Fcopy_sequence (h->next); > + h->index = Fcopy_sequence (h->index); > + h->hash = Fcopy_sequence (h->hash); Yes. It may not be absolutely perfect, but it's simple enough and should do the trick. Stefan ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 17:58 ` Pip Cet 2019-07-08 22:18 ` Lars Ingebrigtsen @ 2019-07-08 22:23 ` Michael Heerdegen 2019-07-09 15:43 ` Eli Zaretskii 2019-07-09 20:15 ` Stefan Monnier 2 siblings, 1 reply; 53+ messages in thread From: Michael Heerdegen @ 2019-07-08 22:23 UTC (permalink / raw) To: Pip Cet; +Cc: Lars Ingebrigtsen, npostavs, 36447, Stefan Monnier Pip Cet <pipcet@gmail.com> writes: > It is. I think it's worth the risk to push my original fix (reattached > to this message) now, but I don't have commit privileges so it's not > my decision to make. I have been using your latest fix version for some days now without any problems. Since your patch surely doesn't qualify as "tiny" (or whatever the correct wording is), I think you must sign the copyright papers first. You can also just get push access for the repository. Maybe others can assist, I can't even judge if your patch is appropriate (I don't speak C), I only tried it. Michael. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 22:23 ` Michael Heerdegen @ 2019-07-09 15:43 ` Eli Zaretskii 0 siblings, 0 replies; 53+ messages in thread From: Eli Zaretskii @ 2019-07-09 15:43 UTC (permalink / raw) To: Michael Heerdegen; +Cc: 36447, larsi, monnier, pipcet, npostavs > From: Michael Heerdegen <michael_heerdegen@web.de> > Date: Tue, 09 Jul 2019 00:23:09 +0200 > Cc: Lars Ingebrigtsen <larsi@gnus.org>, npostavs@gmail.com, > 36447@debbugs.gnu.org, Stefan Monnier <monnier@iro.umontreal.ca> > > Since your patch surely doesn't qualify as "tiny" (or whatever the > correct wording is), I think you must sign the copyright papers first. He did that already. ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-08 17:58 ` Pip Cet 2019-07-08 22:18 ` Lars Ingebrigtsen 2019-07-08 22:23 ` Michael Heerdegen @ 2019-07-09 20:15 ` Stefan Monnier 2 siblings, 0 replies; 53+ messages in thread From: Stefan Monnier @ 2019-07-09 20:15 UTC (permalink / raw) To: Pip Cet; +Cc: Michael Heerdegen, Lars Ingebrigtsen, npostavs, 36447 > Is there anything I can do to help get this bug fixed on master? Thanks! Pushed, thank you, Stefan ^ permalink raw reply [flat|nested] 53+ messages in thread
* bug#36447: 27.0.50; New "Unknown keyword" errors 2019-07-05 6:35 ` Pip Cet 2019-07-05 7:50 ` Eli Zaretskii @ 2019-07-05 7:55 ` Katsumi Yamaoka 1 sibling, 0 replies; 53+ messages in thread From: Katsumi Yamaoka @ 2019-07-05 7:55 UTC (permalink / raw) To: Pip Cet; +Cc: Michael Heerdegen, Noam Postavsky, 36447 On Fri, 05 Jul 2019 06:35:03 +0000, Pip Cet wrote: > On Fri, Jul 5, 2019 at 1:59 AM Michael Heerdegen >> Yeah, it just happened again, when rebuilding a newer master version. >> And this time, cleaning and make bootstrap didn't fix it. I still use >> the broken build FWIW as it's not as broken as the last time. > Is it still the same symptom, though? Me too, but I can use the latest master as usual by adding (load "custom" nil t) to the beginning of the ~/.emacs file. [...] > Can you do the following? > 1. Run in gdb, with ASLR disabled [...] Unfortunately gdb on Cygwin will probably crash, so I'll wait for someone to fix it. Thanks. In GNU Emacs 27.0.50 (build 1, x86_64-pc-cygwin, GTK+ Version 3.22.28) of 2019-07-05 built on localhost Windowing system distributor 'The Cygwin/X Project', version 11.0.12004000 ^ permalink raw reply [flat|nested] 53+ messages in thread
end of thread, other threads:[~2019-07-14 14:06 UTC | newest] Thread overview: 53+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2019-06-30 18:23 bug#36447: 27.0.50; New "Unknown keyword" errors Michael Heerdegen 2019-06-30 18:43 ` Eli Zaretskii 2019-06-30 21:44 ` Michael Heerdegen 2019-07-01 12:25 ` Noam Postavsky 2019-07-01 13:20 ` Pip Cet 2019-07-01 22:04 ` Michael Heerdegen 2019-07-02 1:59 ` Stefan Kangas 2019-07-02 14:17 ` Eli Zaretskii 2019-07-02 13:29 ` Pip Cet 2019-07-02 15:35 ` Michael Heerdegen 2019-07-02 16:20 ` Noam Postavsky 2019-07-02 22:50 ` Pip Cet 2019-07-03 11:57 ` Pip Cet 2019-07-05 1:59 ` Michael Heerdegen 2019-07-05 6:35 ` Pip Cet 2019-07-05 7:50 ` Eli Zaretskii 2019-07-05 8:12 ` Pip Cet 2019-07-05 8:25 ` Eli Zaretskii 2019-07-05 8:36 ` Pip Cet 2019-07-05 8:41 ` Eli Zaretskii 2019-07-05 9:09 ` Pip Cet 2019-07-05 12:23 ` Robert Pluim 2019-07-05 12:33 ` Eli Zaretskii 2019-07-05 13:41 ` Pip Cet 2019-07-05 18:00 ` Stefan Monnier 2019-07-05 18:07 ` Eli Zaretskii 2019-07-05 20:16 ` Stefan Monnier 2019-07-05 18:57 ` Pip Cet 2019-07-05 19:13 ` Eli Zaretskii 2019-07-05 20:21 ` Stefan Monnier 2019-07-05 21:52 ` Pip Cet 2019-07-05 22:10 ` Stefan Monnier 2019-07-06 6:45 ` Eli Zaretskii 2019-07-06 15:08 ` Pip Cet 2019-07-09 21:05 ` Stefan Monnier 2019-07-10 2:38 ` Eli Zaretskii 2019-07-10 3:19 ` Daniel Colascione 2019-07-10 15:01 ` Pip Cet 2019-07-10 17:16 ` Daniel Colascione 2019-07-10 20:14 ` Pip Cet 2019-07-06 15:32 ` Michael Heerdegen 2019-07-08 17:30 ` Lars Ingebrigtsen 2019-07-08 17:58 ` Pip Cet 2019-07-08 22:18 ` Lars Ingebrigtsen 2019-07-08 22:25 ` Noam Postavsky 2019-07-09 14:00 ` Pip Cet 2019-07-10 3:01 ` Daniel Colascione 2019-07-14 14:06 ` Noam Postavsky 2019-07-08 23:22 ` Stefan Monnier 2019-07-08 22:23 ` Michael Heerdegen 2019-07-09 15:43 ` Eli Zaretskii 2019-07-09 20:15 ` Stefan Monnier 2019-07-05 7:55 ` Katsumi Yamaoka
Code repositories for project(s) associated with this external index https://git.savannah.gnu.org/cgit/emacs.git https://git.savannah.gnu.org/cgit/emacs/org-mode.git This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.