From 1d40c60265c34caaf9d8f5ad0c66322e7281902d Mon Sep 17 00:00:00 2001 From: Matt Armstrong Date: Sun, 9 Oct 2022 19:42:14 -0700 Subject: [PATCH 2/2] Add ./configure --enable-checking=overlays This enables expensive validation checks of overlay data structures. Some are too expensive to turn on by default with --enable-checking=all since they turn O(log N) algorithms into O(N) algorithms. * src/itree.c (ENABLE_OVERLAY_CHECKING): define to 0 if undefined. (struct check_subtree_result): new struct. (check_subtree): renamed from recurse_check_tree, modified to use check_subtree_result and check more invariants. (check_tree): use check_subtree_result. * configure.ac: add support for ENABLE_OVERLAY_CHECKING. --- configure.ac | 11 +++- src/itree.c | 164 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 148 insertions(+), 27 deletions(-) diff --git a/configure.ac b/configure.ac index 8ba52a492b..42a55c7572 100644 --- a/configure.ac +++ b/configure.ac @@ -582,7 +582,7 @@ AC_DEFUN enable only specific categories of checks. Categories are: all,yes,no. Flags are: stringbytes, stringoverrun, stringfreelist, - structs, glyphs])], + structs, glyphs, overlays])], [ac_checking_flags="${enableval}"],[]) IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS="$IFS," CHECK_STRUCTS=false @@ -596,7 +596,8 @@ AC_DEFUN ac_gc_check_stringbytes= ; ac_gc_check_string_overrun= ; ac_gc_check_string_free_list= ; - ac_glyphs_debug= ;; + ac_glyphs_debug= ; + ac_enable_overlay_checking= ;; all) ac_enable_checking=1 ; CHECK_STRUCTS=true ac_gc_check_stringbytes=1 ; @@ -609,6 +610,7 @@ AC_DEFUN stringfreelist) ac_gc_check_string_free_list=1 ;; structs) CHECK_STRUCTS=true ;; glyphs) ac_glyphs_debug=1 ;; + overlays) ac_enable_overlay_checking=1 ;; *) AC_MSG_ERROR([unknown check category $check]) ;; esac done @@ -645,6 +647,11 @@ AC_DEFUN AC_DEFINE([GLYPH_DEBUG], [1], [Define this to enable glyphs debugging code.]) fi +if test x$ac_enable_overlay_checking != x ; then + AC_DEFINE([ENABLE_OVERLAY_CHECKING], [1], +[Define this temporarily to hunt a bug. If defined, expensive + invariant checking of the overlay data structures is enabled.]) +fi dnl The name of this option is unfortunate. It predates, and has no dnl relation to, the "sampling-based elisp profiler" added in 24.3. diff --git a/src/itree.c b/src/itree.c index bbab70dac7..31a7e01090 100644 --- a/src/itree.c +++ b/src/itree.c @@ -144,6 +144,10 @@ Copyright (C) 2017-2022 Free Software Foundation, Inc. static struct interval_generator* interval_generator_create (struct interval_tree *); static void interval_tree_insert (struct interval_tree *, struct interval_node *); +#ifndef ENABLE_OVERLAY_CHECKING +#define ENABLE_OVERLAY_CHECKING 0 +#endif + /* The sentinel node, the null node. */ struct interval_node itree_null; @@ -205,14 +209,44 @@ itree_init (void) iter = interval_generator_create (NULL); } -static ptrdiff_t -recurse_check_tree (struct interval_node *node, uintmax_t tree_otick, - ptrdiff_t offset, ptrdiff_t min_begin, - ptrdiff_t max_begin, intmax_t *size) +struct check_subtree_result +{ + /* Were all nodes visited? */ + bool complete; + + /* Computed node count of the tree. */ + int size; + + /* Computed limit of the tree (max END). */ + ptrdiff_t limit; + + /* Computed black height of the tree (count of black nodes from the + bottom up to the root). */ + int black_height; +}; + +static struct check_subtree_result +check_subtree (struct interval_node *node, uintmax_t tree_otick, + int max_depth, ptrdiff_t offset, ptrdiff_t min_begin, + ptrdiff_t max_begin) { + struct check_subtree_result result = { .complete = false, + .size = 0, + .limit = PTRDIFF_MIN, + .black_height = 0 }; if (node == ITREE_NULL) - return PTRDIFF_MIN; - ++*size; + { + /* Every nil node of a Red-Black tree is black */ + result.black_height = 1; + result.complete = true; + return result; + }; + + if (max_depth == 0) + { + result.complete = false; + return result; + } /* Validate structure. */ eassert ( @@ -221,12 +255,23 @@ recurse_check_tree (struct interval_node *node, uintmax_t tree_otick, eassert (node->left == ITREE_NULL || node->left->parent == node); eassert (node->right == ITREE_NULL || node->right->parent == node); - /* We don't check the RB invariants here (neither the absence of - red+red nor the equal-black-depth), so that we can use this check - even while the tree is temporarily breaking some of those invarints. */ - /* Red nodes cannot have red parents. */ - /* eassert (node->parent == ITREE_NULL - || !(node->red && node->parent->red)); */ + /* We don't normally check the RB invariants here (neither the + absence of red+red nor the equal-black-depth), so that we can use + this check even while the tree is temporarily breaking some of + those invarints. You can enable them if you want. */ + if (false) + { + /* If a node is red then both of its children are black. Red + nodes cannot have red parents. */ + if (node->red) + { + eassert (node->left == ITREE_NULL + || node->left->red == false); + eassert (node->right == ITREE_NULL + || node->right->red == false); + eassert (node->parent == ITREE_NULL || !node->parent->red); + } + } eassert (node->offset == 0 || node->otick < tree_otick); @@ -240,18 +285,44 @@ recurse_check_tree (struct interval_node *node, uintmax_t tree_otick, eassert (begin <= max_begin); eassert (end <= limit); - ptrdiff_t left_limit - = recurse_check_tree (node->left, tree_otick, offset, min_begin, - begin, size); - ptrdiff_t right_limit - = recurse_check_tree (node->right, tree_otick, offset, begin, - max_begin, size); - eassert (left_limit <= limit); - eassert (right_limit <= limit); - eassert (limit == max (end, max (left_limit, right_limit))); - return limit; + struct check_subtree_result left_result + = check_subtree (node->left, tree_otick, max_depth - 1, offset, + min_begin, begin); + struct check_subtree_result right_result + = check_subtree (node->right, tree_otick, max_depth - 1, offset, + begin, max_begin); + + eassert (left_result.limit <= limit); + eassert (right_result.limit <= limit); + + result.complete = left_result.complete && right_result.complete; + if (result.complete) + { + /* Every path from a node to a descendent leaf contains the same + number of black nodes. Often said this way: all nodes have + the same "black height". */ + eassert (left_result.black_height == right_result.black_height); + result.black_height + = (node->red ? 0 : 1) + left_result.black_height; + + result.size = 1 + left_result.size + right_result.size; + result.limit + = max (end, max (left_result.limit, right_result.limit)); + + eassert (limit == result.limit); + } + + return result; } +/* Validate invariants for TREE. + + This runs in constant time when ENABLE_OVERLAY_CHECKING is 0 + (i.e. Emacs is not configured with + "--enable_checking=yes,overlays"). In this mode it can't check all + the invariants. When ENABLE_OVERLAY_CHECKING is 1 it checks the + entire tree and validates all invariants. +*/ static bool check_tree (struct interval_tree *tree) { @@ -260,9 +331,52 @@ check_tree (struct interval_tree *tree) if (tree->root == ITREE_NULL) return true; - intmax_t size = 0; - recurse_check_tree (tree->root, tree->otick, 0, - PTRDIFF_MIN, PTRDIFF_MAX, &size); + /* Establish an upper bound on the height of the tree. + + A red-black tree with n nodes has height at least log2(n + 1) but + no more than 2 * log2(n + 1). A red-black tree with height h has + at least pow(2, h / 2) - 1 nodes but no more than pow(2, h) - 1. + + On 64 bit architectures a red black tree has at least 2 pointers, + 8 bytes each. A 64 bit architecture has a theoretical maximum of + 2 ^ 64 addressable bytes. + + Putting this together, the upper bound for the height of any + red-black tree on a 64 bit architecture is 120. + + 2 * log2((2 ^ 64 bytes) / (2 * 8) bytes + 1) => 120 + + In truth, the memory overhead for our tree nodes is more than two + pointers, but even if we say each has 1 KiB overhead the bound + only drops to 108, which is an unimportant difference here. + */ + const int max_height = 120; + + /* + When ENABLE_OVERLAY_CHECKING is true, check the invariants of the + entire tree. + + Otherwise limit invariant checking to at most 257 nodes. When so + limited some of the invariants cannot be asserted, but the + asymtotic run time of the tree algorithms is preserved, so it is + practical to enable them whenever ENABLE_CHECKING (without + ENABLE_OVERLAY_CHECKING) is defined. + + Note: a tree of height 8 can have at most 2^8-1 or 257 nodes. + */ + const int max_depth + = (ENABLE_OVERLAY_CHECKING || tree->size <= 257) ? max_height : 8; + + struct interval_node *node = tree->root; + struct check_subtree_result result + = check_subtree (node, tree->otick, max_depth, node->offset, + PTRDIFF_MIN, PTRDIFF_MAX); + if (ENABLE_OVERLAY_CHECKING) + eassert (result.complete); + if (result.complete) + eassert (result.size == tree->size); + + /* The only way this function fails is eassert(). */ return true; } -- 2.35.1