* [PATCH 3/9] xap_helper.h: use khashl.h instead of hsearch(3)
2024-05-19 21:55 [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
2024-05-19 21:55 ` [PATCH 1/9] config: dedupe ibx->{newsgroup} Eric Wong
2024-05-19 21:55 ` [PATCH 2/9] xap_helper: key search instances by -Q params, too Eric Wong
@ 2024-05-19 21:55 ` Eric Wong
2024-05-20 18:59 ` [PATCH 10/9] xap_helper.h: fix CPP error on non-glibc Eric Wong
2024-05-19 21:55 ` [PATCH 4/9] xap_helper.h: use xcalloc to simplify error checking Eric Wong
` (5 subsequent siblings)
8 siblings, 1 reply; 11+ messages in thread
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
To: meta
hsearch(3) and friends are just too horrid of APIs and subject
to fatal problems due to system-dependent ENTRY.key use of
strdup(3). So replace it with khashl (which is a newer, smaller
version of the widely-used khash in git.git).
We'll also be able to use khashl in the future for
the FUSE shim if liburcu isn't available.
---
MANIFEST | 1 +
lib/PublicInbox/khashl.h | 502 +++++++++++++++++++++++++++++++++++
lib/PublicInbox/xap_helper.h | 55 ++--
lib/PublicInbox/xh_cidx.h | 79 ++++--
4 files changed, 590 insertions(+), 47 deletions(-)
create mode 100644 lib/PublicInbox/khashl.h
diff --git a/MANIFEST b/MANIFEST
index fb175e5f..5796e05b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -385,6 +385,7 @@ lib/PublicInbox/Xapcmd.pm
lib/PublicInbox/XhcMset.pm
lib/PublicInbox/XhcMsetIterator.pm
lib/PublicInbox/gcf2_libgit2.h
+lib/PublicInbox/khashl.h
lib/PublicInbox/xap_helper.h
lib/PublicInbox/xh_cidx.h
lib/PublicInbox/xh_mset.h
diff --git a/lib/PublicInbox/khashl.h b/lib/PublicInbox/khashl.h
new file mode 100644
index 00000000..170b81ff
--- /dev/null
+++ b/lib/PublicInbox/khashl.h
@@ -0,0 +1,502 @@
+/* The MIT License
+
+ Copyright (c) 2019-2023 by Attractive Chaos <attractor@live.co.uk>
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+#ifndef __AC_KHASHL_H
+#define __AC_KHASHL_H
+
+#define AC_VERSION_KHASHL_H "0.2"
+
+typedef uint32_t khint32_t;
+typedef uint64_t khint64_t;
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define kh_inline inline
+#define KH_LOCAL static kh_inline
+
+#ifndef kcalloc
+#define kcalloc(N,Z) xcalloc(N,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
+/****************************
+ * Simple private functions *
+ ****************************/
+
+#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U)
+#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU))
+#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
+
+#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
+
+static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
+
+/*******************
+ * Hash table base *
+ *******************/
+
+#define __KHASHL_TYPE(HType, khkey_t) \
+ typedef struct HType { \
+ khint_t bits, count; \
+ khint32_t *used; \
+ khkey_t *keys; \
+ } HType;
+
+#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \
+ extern HType *prefix##_init(void); \
+ extern void prefix##_destroy(HType *h); \
+ extern void prefix##_clear(HType *h); \
+ extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \
+ extern void prefix##_resize(HType *h, khint_t new_n_buckets); \
+ extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \
+ extern void prefix##_del(HType *h, khint_t k);
+
+#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+ SCOPE HType *prefix##_init(void) { \
+ return (HType*)kcalloc(1, sizeof(HType)); \
+ } \
+ SCOPE void prefix##_release(HType *h) { \
+ kfree((void *)h->keys); kfree(h->used); \
+ } \
+ SCOPE void prefix##_destroy(HType *h) { \
+ if (!h) return; \
+ prefix##_release(h); \
+ kfree(h); \
+ } \
+ SCOPE void prefix##_clear(HType *h) { \
+ if (h && h->used) { \
+ khint_t n_buckets = (khint_t)1U << h->bits; \
+ memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
+ h->count = 0; \
+ } \
+ }
+
+#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \
+ khint_t i, last, n_buckets, mask; \
+ if (!h->keys) return 0; \
+ n_buckets = (khint_t)1U << h->bits; \
+ mask = n_buckets - 1U; \
+ i = last = __kh_h2b(hash, h->bits); \
+ while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+ i = (i + 1U) & mask; \
+ if (i == last) return n_buckets; \
+ } \
+ return !__kh_used(h->used, i)? n_buckets : i; \
+ } \
+ SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); }
+
+#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { \
+ khint32_t *new_used = NULL; \
+ khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
+ while ((x >>= 1) != 0) ++j; \
+ if (new_n_buckets & (new_n_buckets - 1)) ++j; \
+ new_bits = j > 2? j : 2; \
+ new_n_buckets = (khint_t)1U << new_bits; \
+ if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return; /* noop, requested size is too small */ \
+ new_used = (khint32_t*)kcalloc(__kh_fsize(new_n_buckets), sizeof(khint32_t)); \
+ n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
+ if (n_buckets < new_n_buckets) { /* expand */ \
+ h->keys = (khkey_t *)xreallocarray(h->keys, \
+ new_n_buckets, sizeof(khkey_t)); \
+ } /* otherwise shrink */ \
+ new_mask = new_n_buckets - 1; \
+ for (j = 0; j != n_buckets; ++j) { \
+ khkey_t key; \
+ if (!__kh_used(h->used, j)) continue; \
+ key = h->keys[j]; \
+ __kh_set_unused(h->used, j); \
+ while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+ khint_t i; \
+ i = __kh_h2b(__hash_fn(key), new_bits); \
+ while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \
+ __kh_set_used(new_used, i); \
+ if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \
+ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+ __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \
+ } else { /* write the element and jump out of the loop */ \
+ h->keys[i] = key; \
+ break; \
+ } \
+ } \
+ } \
+ if (n_buckets > new_n_buckets) /* shrink the hash table */ \
+ h->keys = (khkey_t *)xreallocarray(h->keys, \
+ new_n_buckets, sizeof(khkey_t)); \
+ kfree(h->used); /* free the working space */ \
+ h->used = new_used, h->bits = new_bits; \
+ }
+
+#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \
+ khint_t n_buckets, i, last, mask; \
+ n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
+ *absent = -1; \
+ if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \
+ prefix##_resize(h, n_buckets + 1U); \
+ n_buckets = (khint_t)1U<<h->bits; \
+ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+ mask = n_buckets - 1; \
+ i = last = __kh_h2b(hash, h->bits); \
+ while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+ i = (i + 1U) & mask; \
+ if (i == last) break; \
+ } \
+ if (!__kh_used(h->used, i)) { /* not present at all */ \
+ h->keys[i] = *key; \
+ __kh_set_used(h->used, i); \
+ ++h->count; \
+ *absent = 1; \
+ } else *absent = 0; /* Don't touch h->keys[i] if present */ \
+ return i; \
+ } \
+ SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); }
+
+#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \
+ SCOPE int prefix##_del(HType *h, khint_t i) { \
+ khint_t j = i, k, mask, n_buckets; \
+ if (!h->keys) return 0; \
+ n_buckets = (khint_t)1U<<h->bits; \
+ mask = n_buckets - 1U; \
+ while (1) { \
+ j = (j + 1U) & mask; \
+ if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \
+ k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \
+ if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
+ h->keys[i] = h->keys[j], i = j; \
+ } \
+ __kh_set_unused(h->used, i); \
+ --h->count; \
+ return 1; \
+ }
+
+#define KHASHL_DECLARE(HType, prefix, khkey_t) \
+ __KHASHL_TYPE(HType, khkey_t) \
+ __KHASHL_PROTOTYPES(HType, prefix, khkey_t)
+
+#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ __KHASHL_TYPE(HType, khkey_t) \
+ __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+ __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
+
+#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \
+ typedef struct HType { \
+ khint64_t count:54, bits:8; \
+ HType##_sub *sub; \
+ } HType; \
+ SCOPE HType *prefix##_init_sub(HType *g, size_t bits) { \
+ g->bits = bits; \
+ g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \
+ return g; \
+ } \
+ SCOPE HType *prefix##_init(void) { \
+ HType *g; \
+ g = (HType*)kcalloc(1, sizeof(*g)); \
+ return prefix##_init_sub(g, 0); /* unsure about default */ \
+ } \
+ SCOPE void prefix##_release(HType *g) { \
+ int t; \
+ for (t = 0; t < 1<<g->bits; ++t) \
+ prefix##_sub_release(&g->sub[t]); \
+ kfree(g->sub); \
+ } \
+ SCOPE void prefix##_destroy(HType *g) { \
+ if (!g) return; \
+ prefix##_release(g); \
+ kfree(g); \
+ } \
+ SCOPE void prefix##_clear(HType *g) { \
+ int t; \
+ if (!g) return; \
+ for (t = 0; t < 1<<g->bits; ++t) \
+ prefix##_sub_clear(&g->sub[t]); \
+ } \
+ SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \
+ khint_t hash, low, ret; \
+ kh_ensitr_t r; \
+ HType##_sub *h; \
+ hash = __hash_fn(*key); \
+ low = hash & ((1U<<g->bits) - 1); \
+ h = &g->sub[low]; \
+ ret = prefix##_sub_getp_core(h, key, hash); \
+ if (ret >= kh_end(h)) r.sub = low, r.pos = (khint_t)-1; \
+ else r.sub = low, r.pos = ret; \
+ return r; \
+ } \
+ SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \
+ SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \
+ khint_t hash, low, ret; \
+ kh_ensitr_t r; \
+ HType##_sub *h; \
+ hash = __hash_fn(*key); \
+ low = hash & ((1U<<g->bits) - 1); \
+ h = &g->sub[low]; \
+ ret = prefix##_sub_putp_core(h, key, hash, absent); \
+ if (*absent) ++g->count; \
+ if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \
+ else r.sub = low, r.pos = ret; \
+ return r; \
+ } \
+ SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \
+ SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \
+ HType##_sub *h = &g->sub[itr.sub]; \
+ int ret; \
+ ret = prefix##_sub_del(h, itr.pos); \
+ if (ret) --g->count; \
+ return ret; \
+ }
+
+/*****************************
+ * More convenient interface *
+ *****************************/
+
+#define __kh_packed /* noop, we use -Werror=address-of-packed-member */
+#define __kh_cached_hash(x) ((x).hash)
+
+#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \
+ static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \
+ static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \
+ SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \
+ SCOPE void prefix##_release(HType *h) { prefix##_s_release(h); } \
+ SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \
+ SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } \
+ SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \
+
+#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+ typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
+ static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
+ static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
+ SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
+ SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \
+ SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
+ SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \
+ SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \
+
+#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+ typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \
+ static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \
+ SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \
+ SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); }
+
+#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+ typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \
+ static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+ KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \
+ SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \
+ SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \
+ SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \
+ SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \
+ SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); }
+
+#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+ typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
+ static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
+ static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
+ KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
+ SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
+ SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \
+ SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
+ SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \
+ SCOPE void prefix##_resize(HType *h, khint_t ignore) { /* noop */ } \
+ SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
+ SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \
+ SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \
+
+/**************************
+ * Public macro functions *
+ **************************/
+
+#define kh_bucket(h, x) ((h)->keys[x])
+
+/*! @function
+ @abstract Get the number of elements in the hash table
+ @param h Pointer to the hash table
+ @return Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->count)
+
+#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U)
+
+/*! @function
+ @abstract Get the end iterator
+ @param h Pointer to the hash table
+ @return The end iterator [khint_t]
+ */
+#define kh_end(h) kh_capacity(h)
+
+/*! @function
+ @abstract Get key given an iterator
+ @param h Pointer to the hash table
+ @param x Iterator to the bucket [khint_t]
+ @return Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x].key)
+
+/*! @function
+ @abstract Get value given an iterator
+ @param h Pointer to the hash table
+ @param x Iterator to the bucket [khint_t]
+ @return Value [type of values]
+ @discussion For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->keys[x].val)
+
+/*! @function
+ @abstract Alias of kh_val()
+ */
+#define kh_value(h, x) kh_val(h, x)
+
+/*! @function
+ @abstract Test whether a bucket contains data.
+ @param h Pointer to the hash table
+ @param x Iterator to the bucket [khint_t]
+ @return 1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) __kh_used((h)->used, (x))
+
+#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_is_end(x) ((x).pos == (khint_t)-1)
+#define kh_ens_size(g) ((g)->count)
+
+/**************************************
+ * Common hash and equality functions *
+ **************************************/
+
+#define kh_eq_generic(a, b) ((a) == (b))
+#define kh_eq_str(a, b) (strcmp((a), (b)) == 0)
+#define kh_hash_dummy(x) ((khint_t)(x))
+
+static kh_inline khint_t kh_hash_uint32(khint_t key) {
+ key += ~(key << 15);
+ key ^= (key >> 10);
+ key += (key << 3);
+ key ^= (key >> 6);
+ key += ~(key << 11);
+ key ^= (key >> 16);
+ return key;
+}
+
+static kh_inline khint_t kh_hash_uint64(khint64_t key) {
+ key = ~key + (key << 21);
+ key = key ^ key >> 24;
+ key = (key + (key << 3)) + (key << 8);
+ key = key ^ key >> 14;
+ key = (key + (key << 2)) + (key << 4);
+ key = key ^ key >> 28;
+ key = key + (key << 31);
+ return (khint_t)key;
+}
+
+#define KH_FNV_SEED 11
+
+static kh_inline khint_t kh_hash_str(const char *s) { /* FNV1a */
+ khint_t h = KH_FNV_SEED ^ 2166136261U;
+ const unsigned char *t = (const unsigned char*)s;
+ for (; *t; ++t)
+ h ^= *t, h *= 16777619;
+ return h;
+}
+
+static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) {
+ khint_t h = KH_FNV_SEED ^ 2166136261U;
+ int i;
+ for (i = 0; i < len; ++i)
+ h ^= s[i], h *= 16777619;
+ return h;
+}
+
+/*! @function
+ @abstract Get the start iterator
+ @param h Pointer to the hash table
+ @return The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+ @abstract Iterate over the entries in the hash table
+ @param h Pointer to the hash table
+ @param kvar Variable to which key will be assigned
+ @param vvar Variable to which value will be assigned
+ @param code Block of code to execute
+ */
+#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
+ for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
+ if (!kh_exist(h,__i)) continue; \
+ (kvar) = kh_key(h,__i); \
+ (vvar) = kh_val(h,__i); \
+ code; \
+ } }
+
+#define kh_ens_foreach(g, kvar, vvar, code) do { \
+ size_t t; \
+ for (t = 0; t < 1<<g->bits; ++t) \
+ kh_foreach(&g->sub[t], kvar, vvar, code); \
+} while (0)
+
+#define kh_ens_foreach_value(g, vvar, code) do { \
+ size_t t; \
+ for (t = 0; t < 1<<g->bits; ++t) \
+ kh_foreach_value(&g->sub[t], vvar, code); \
+} while (0)
+
+/*! @function
+ @abstract Iterate over the values in the hash table
+ @param h Pointer to the hash table
+ @param vvar Variable to which value will be assigned
+ @param code Block of code to execute
+ */
+#define kh_foreach_value(h, vvar, code) { khint_t __i; \
+ for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
+ if (!kh_exist(h,__i)) continue; \
+ (vvar) = kh_val(h,__i); \
+ code; \
+ } }
+
+#endif /* __AC_KHASHL_H */
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 8bfd7ab6..92d3d12f 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -7,7 +7,7 @@
* this is not linked to Perl in any way.
* C (not C++) is used as much as possible to lower the contribution
* barrier for hackers who mainly know C (this includes the maintainer).
- * Yes, that means we use C stdlib stuff like hsearch and open_memstream
+ * Yes, that means we use C stdlib stuff like open_memstream
* instead their equivalents in the C++ stdlib :P
* Everything here is an unstable internal API of public-inbox and
* NOT intended for ordinary users; only public-inbox hackers
@@ -15,6 +15,9 @@
#ifndef _ALL_SOURCE
# define _ALL_SOURCE
#endif
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
#if defined(__NetBSD__) && !defined(_OPENBSD_SOURCE) // for reallocarray(3)
# define _OPENBSD_SOURCE
#endif
@@ -83,6 +86,36 @@
#define ABORT(...) do { warnx(__VA_ARGS__); abort(); } while (0)
#define EABORT(...) do { warn(__VA_ARGS__); abort(); } while (0)
+static void *xcalloc(size_t nmemb, size_t size)
+{
+ void *ret = calloc(nmemb, size);
+ if (!ret) EABORT("calloc(%zu, %zu)", nmemb, size);
+ return ret;
+}
+
+#if defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
+ MY_VER(__GLIBC__, __GLIBC_MINOR__, 0) >= MY_VER(2, 28, 0)
+# define HAVE_REALLOCARRAY 1
+#elif (defined(__OpenBSD__) || defined(__DragonFly__) || \
+ defined(__FreeBSD__) || defined(__NetBSD__)
+# define HAVE_REALLOCARRAY 1
+#endif
+
+static void *xreallocarray(void *ptr, size_t nmemb, size_t size)
+{
+#ifdef HAVE_REALLOCARRAY
+ void *ret = reallocarray(ptr, nmemb, size);
+#else // can't rely on __builtin_mul_overflow in gcc 4.x :<
+ void *ret = NULL;
+ if (nmemb && size > SIZE_MAX / nmemb)
+ errno = ENOMEM;
+ else
+ ret = realloc(ptr, nmemb * size);
+#endif
+ if (!ret) EABORT("reallocarray(..., %zu, %zu)", nmemb, size);
+ return ret;
+}
+
// sock_fd is modified in signal handler, yes, it's SOCK_SEQPACKET
static volatile int sock_fd = STDIN_FILENO;
static sigset_t fullset, workerset;
@@ -374,25 +407,6 @@ static size_t off2size(off_t n)
return (size_t)n;
}
-static char *hsearch_enter_key(char *s)
-{
-#if defined(__OpenBSD__) || defined(__DragonFly__)
- // hdestroy frees each key on some platforms,
- // so give it something to free:
- char *ret = strdup(s);
- if (!ret) err(EXIT_FAILURE, "strdup");
- return ret;
-// AFAIK there's no way to detect musl, assume non-glibc Linux is musl:
-#elif defined(__GLIBC__) || defined(__linux__) || \
- defined(__FreeBSD__) || defined(__NetBSD__)
- // do nothing on these platforms
-#else
-#warning untested platform detected, unsure if hdestroy(3) frees keys
-#warning contact us at meta@public-inbox.org if you get segfaults
-#endif
- return s;
-}
-
// for test usage only, we need to ensure the compiler supports
// __cleanup__ when exceptions are thrown
struct inspect { struct req *req; };
@@ -421,6 +435,7 @@ static bool cmd_test_sleep(struct req *req)
for (;;) poll(NULL, 0, 10);
return false;
}
+#include "khashl.h"
#include "xh_mset.h" // read-only (WWW, IMAP, lei) stuff
#include "xh_cidx.h" // CodeSearchIdx.pm stuff
diff --git a/lib/PublicInbox/xh_cidx.h b/lib/PublicInbox/xh_cidx.h
index 311ca05f..8cc6a845 100644
--- a/lib/PublicInbox/xh_cidx.h
+++ b/lib/PublicInbox/xh_cidx.h
@@ -3,11 +3,38 @@
// This file is only intended to be included by xap_helper.h
// it implements pieces used by CodeSearchIdx.pm
+// TODO: consider making PublicInbox::CodeSearchIdx emit binary
+// (20 or 32-bit) OIDs instead of ASCII hex. It would require
+// more code in both Perl and C++, though...
+
+// assumes trusted data from same host
+static inline unsigned int hex2uint(char c)
+{
+ switch (c) {
+ case '0' ... '9': return c - '0';
+ case 'a' ... 'f': return c - 'a' + 10;
+ default: return 0xff; // oh well...
+ }
+}
+
+// assumes trusted data from same host
+static kh_inline khint_t sha_hex_hash(const char *hex)
+{
+ khint_t ret = 0;
+
+ for (size_t shift = 32; shift; )
+ ret |= hex2uint(*hex++) << (shift -= 4);
+
+ return ret;
+}
+
+KHASHL_CMAP_INIT(KH_LOCAL, root2id_map, root2id,
+ const char *, const char *,
+ sha_hex_hash, kh_eq_str)
+
static void term_length_extract(struct req *req)
{
- req->lenv = (size_t *)calloc(req->pfxc, sizeof(size_t));
- if (!req->lenv)
- EABORT("lenv = calloc(%d %zu)", req->pfxc, sizeof(size_t));
+ req->lenv = (size_t *)xcalloc(req->pfxc, sizeof(size_t));
for (int i = 0; i < req->pfxc; i++) {
char *pfx = req->pfxv[i];
// extract trailing digits as length:
@@ -101,6 +128,7 @@ struct dump_roots_tmp {
void *mm_ptr;
char **entries;
struct fbuf wbuf;
+ root2id_map *root2id;
int root2off_fd;
};
@@ -110,7 +138,8 @@ static void dump_roots_ensure(void *ptr)
struct dump_roots_tmp *drt = (struct dump_roots_tmp *)ptr;
if (drt->root2off_fd >= 0)
xclose(drt->root2off_fd);
- hdestroy(); // idempotent
+ if (drt->root2id)
+ root2id_cm_destroy(drt->root2id);
size_t size = off2size(drt->sb.st_size);
if (drt->mm_ptr && munmap(drt->mm_ptr, size))
EABORT("BUG: munmap(%p, %zu)", drt->mm_ptr, size);
@@ -118,23 +147,21 @@ static void dump_roots_ensure(void *ptr)
fbuf_ensure(&drt->wbuf);
}
-static bool root2offs_str(struct fbuf *root_offs, Xapian::Document *doc)
+static bool root2offs_str(struct dump_roots_tmp *drt,
+ struct fbuf *root_offs, Xapian::Document *doc)
{
Xapian::TermIterator cur = doc->termlist_begin();
Xapian::TermIterator end = doc->termlist_end();
- ENTRY e, *ep;
fbuf_init(root_offs);
for (cur.skip_to("G"); cur != end; cur++) {
std::string tn = *cur;
if (!starts_with(&tn, "G", 1)) break;
- union { const char *in; char *out; } u;
- u.in = tn.c_str() + 1;
- e.key = u.out;
- ep = hsearch(e, FIND);
- if (!ep) ABORT("hsearch miss `%s'", e.key);
- // ep->data is a NUL-terminated string matching /[0-9]+/
+ khint_t i = root2id_get(drt->root2id, tn.c_str() + 1);
+ if (i >= kh_end(drt->root2id))
+ ABORT("kh get miss `%s'", tn.c_str() + 1);
fputc(' ', root_offs->fp);
- fputs((const char *)ep->data, root_offs->fp);
+ // kh_val(...) is a NUL-terminated string matching /[0-9]+/
+ fputs(kh_val(drt->root2id, i), root_offs->fp);
}
fputc('\n', root_offs->fp);
ERR_CLOSE(root_offs->fp, EXIT_FAILURE); // ENOMEM
@@ -198,7 +225,7 @@ static enum exc_iter dump_roots_iter(struct req *req,
CLEANUP_FBUF struct fbuf root_offs = {}; // " $ID0 $ID1 $IDx..\n"
try {
Xapian::Document doc = i->get_document();
- if (!root2offs_str(&root_offs, &doc))
+ if (!root2offs_str(drt, &root_offs, &doc))
return ITER_ABORT; // bad request, abort
for (int p = 0; p < req->pfxc; p++)
dump_roots_term(req, p, drt, &root_offs, &doc);
@@ -226,8 +253,7 @@ static bool cmd_dump_roots(struct req *req)
if (fstat(drt.root2off_fd, &drt.sb)) // ENOMEM?
err(EXIT_FAILURE, "fstat(%s)", root2off_file);
// each entry is at least 43 bytes ({OIDHEX}\0{INT}\0),
- // so /32 overestimates the number of expected entries by
- // ~%25 (as recommended by Linux hcreate(3) manpage)
+ // so /32 overestimates the number of expected entries
size_t size = off2size(drt.sb.st_size);
size_t est = (size / 32) + 1; //+1 for "\0" termination
drt.mm_ptr = mmap(NULL, size, PROT_READ,
@@ -236,20 +262,19 @@ static bool cmd_dump_roots(struct req *req)
err(EXIT_FAILURE, "mmap(%zu, %s)", size, root2off_file);
size_t asize = est * 2;
if (asize < est) ABORT("too many entries: %zu", est);
- drt.entries = (char **)calloc(asize, sizeof(char *));
- if (!drt.entries)
- err(EXIT_FAILURE, "calloc(%zu * 2, %zu)", est, sizeof(char *));
+ drt.entries = (char **)xcalloc(asize, sizeof(char *));
size_t tot = split2argv(drt.entries, (char *)drt.mm_ptr, size, asize);
if (tot <= 0) return false; // split2argv already warned on error
- if (!hcreate(est))
- err(EXIT_FAILURE, "hcreate(%zu)", est);
+ drt.root2id = root2id_init();
+ root2id_cm_resize(drt.root2id, est);
for (size_t i = 0; i < tot; ) {
- ENTRY e;
- e.key = hsearch_enter_key(drt.entries[i++]); // dies on ENOMEM
- e.data = drt.entries[i++];
- if (!hsearch(e, ENTER))
- err(EXIT_FAILURE, "hsearch(%s => %s, ENTER)", e.key,
- (const char *)e.data);
+ int absent;
+ const char *key = drt.entries[i++];
+ khint_t k = root2id_put(drt.root2id, key, &absent);
+ if (!absent)
+ err(EXIT_FAILURE, "put(%s => %s, ENTER)",
+ key, drt.entries[i]);
+ kh_val(drt.root2id, k) = drt.entries[i++];
}
req->asc = true;
req->sort_col = -1;
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 6/9] xap_helper: expire DB handles when FD table is near full
2024-05-19 21:55 [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
` (4 preceding siblings ...)
2024-05-19 21:55 ` [PATCH 5/9] xap_helper.h: memoize Xapian handles with khashl Eric Wong
@ 2024-05-19 21:55 ` Eric Wong
2024-05-19 21:55 ` [PATCH 7/9] xap_helper: drop DB handles on EMFILE/ENFILE/etc Eric Wong
` (2 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
To: meta
For long-lived daemons across config reloads, we shouldn't keep
Xapian DBs open forever under FD pressure. So estimate the
number of FDs we need per-shard and start clearing some out
if we have too many open.
While we're at it, hoist out our ulimit_n helper and share it
across extindex and the Perl XapHelper implementation.
---
lib/PublicInbox/ExtSearchIdx.pm | 8 +----
lib/PublicInbox/Search.pm | 16 +++++++++
lib/PublicInbox/XapHelper.pm | 18 +++++++---
lib/PublicInbox/XapHelperCxx.pm | 1 +
lib/PublicInbox/xap_helper.h | 58 +++++++++++++++++++++++++--------
t/xap_helper.t | 23 +++++++++++++
6 files changed, 100 insertions(+), 24 deletions(-)
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 883dbea3..934197c0 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -543,13 +543,7 @@ sub _ibx_for ($$$) {
sub _fd_constrained ($) {
my ($self) = @_;
$self->{-fd_constrained} //= do {
- my $soft;
- if (eval { require BSD::Resource; 1 }) {
- my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
- ($soft, undef) = BSD::Resource::getrlimit($NOFILE);
- } else {
- chomp($soft = `sh -c 'ulimit -n'`);
- }
+ my $soft = PublicInbox::Search::ulimit_n;
if (defined($soft)) {
# $want is an estimate
my $want = scalar(@{$self->{ibx_active}}) + 64;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index e5c5d6ab..25ef49c5 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -54,6 +54,9 @@ use constant {
#
# v1.6.0 adds BYTES, UID and THREADID values
SCHEMA_VERSION => 15,
+
+ # we may have up to 8 FDs per shard (depends on Xapian *shrug*)
+ SHARD_COST => 8,
};
use PublicInbox::Smsg;
@@ -729,4 +732,17 @@ sub get_doc ($$) {
}
}
+# not sure where best to put this...
+sub ulimit_n () {
+ my $n;
+ if (eval { require BSD::Resource; 1 }) {
+ my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+ ($n, undef) = BSD::Resource::getrlimit($NOFILE);
+ } else {
+ require PublicInbox::Spawn;
+ $n = PublicInbox::Spawn::run_qx([qw(/bin/sh -c), 'ulimit -n']);
+ }
+ $n;
+}
+
1;
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index f1311bd4..db9e99ae 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -18,7 +18,7 @@ use POSIX qw(:signal_h);
use Fcntl qw(LOCK_UN LOCK_EX);
use Carp qw(croak);
my $X = \%PublicInbox::Search::X;
-our (%SRCH, %WORKERS, $nworker, $workerset, $in);
+our (%SRCH, %WORKERS, $nworker, $workerset, $in, $SHARD_NFD, $MY_FD_MAX);
our $stderr = \*STDERR;
sub cmd_test_inspect {
@@ -193,8 +193,14 @@ sub dispatch {
my $key = "-d\0".join("\0-d\0", @$dirs);
$key .= "\0".join("\0", map { ('-Q', $_) } @{$req->{Q}}) if $req->{Q};
my $new;
- $req->{srch} = $SRCH{$key} //= do {
+ $req->{srch} = $SRCH{$key} // do {
$new = { qp_flags => $PublicInbox::Search::QP_FLAGS };
+ my $nfd = scalar(@$dirs) * PublicInbox::Search::SHARD_COST;
+ $SHARD_NFD += $nfd;
+ if ($SHARD_NFD > $MY_FD_MAX) {
+ $SHARD_NFD = $nfd;
+ %SRCH = ();
+ }
my $first = shift @$dirs;
my $slow_phrase = -f "$first/iamchert";
$new->{xdb} = $X->{Database}->new($first);
@@ -207,7 +213,7 @@ sub dispatch {
bless $new, $req->{c} ? 'PublicInbox::CodeSearch' :
'PublicInbox::Search';
$new->{qp} = $new->qparse_new;
- $new;
+ $SRCH{$key} = $new;
};
$req->{srch}->{xdb}->reopen unless $new;
$req->{Q} && !$req->{srch}->{qp_extra_done} and
@@ -305,7 +311,7 @@ sub start (@) {
my $c = getsockopt(local $in = \*STDIN, SOL_SOCKET, SO_TYPE);
unpack('i', $c) == SOCK_SEQPACKET or die 'stdin is not SOCK_SEQPACKET';
- local (%SRCH, %WORKERS);
+ local (%SRCH, %WORKERS, $SHARD_NFD, $MY_FD_MAX);
PublicInbox::Search::load_xapian();
$GLP->getoptionsfromarray(\@argv, my $opt = { j => 1 }, 'j=i') or
die 'bad args';
@@ -314,6 +320,10 @@ sub start (@) {
for (@PublicInbox::DS::UNBLOCKABLE, POSIX::SIGUSR1) {
$workerset->delset($_) or die "delset($_): $!";
}
+ $MY_FD_MAX = PublicInbox::Search::ulimit_n //
+ die "E: unable to get RLIMIT_NOFILE: $!";
+ warn "W: RLIMIT_NOFILE=$MY_FD_MAX too low\n" if $MY_FD_MAX < 72;
+ $MY_FD_MAX -= 64;
local $nworker = $opt->{j};
return recv_loop() if $nworker == 0;
diff --git a/lib/PublicInbox/XapHelperCxx.pm b/lib/PublicInbox/XapHelperCxx.pm
index 74852ad1..922bd583 100644
--- a/lib/PublicInbox/XapHelperCxx.pm
+++ b/lib/PublicInbox/XapHelperCxx.pm
@@ -34,6 +34,7 @@ my $ldflags = '-Wl,-O1';
$ldflags .= ' -Wl,--compress-debug-sections=zlib' if $^O ne 'openbsd';
my $xflags = ($ENV{CXXFLAGS} // '-Wall -ggdb3 -pipe') . ' ' .
' -DTHREADID=' . PublicInbox::Search::THREADID .
+ ' -DSHARD_COST=' . PublicInbox::Search::SHARD_COST .
' -DXH_SPEC="'.join('',
map { s/=.*/:/; $_ } @PublicInbox::Search::XH_SPEC) . '" ' .
($ENV{LDFLAGS} // $ldflags);
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 44e0d63e..c71ac06d 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -140,6 +140,7 @@ static int srch_eq(const struct srch *a, const struct srch *b)
KHASHL_CSET_INIT(KH_LOCAL, srch_set, srch_set, struct srch *,
srch_hash, srch_eq)
static srch_set *srch_cache;
+static long my_fd_max, shard_nfd;
// sock_fd is modified in signal handler, yes, it's SOCK_SEQPACKET
static volatile int sock_fd = STDIN_FILENO;
static sigset_t fullset, workerset;
@@ -552,6 +553,34 @@ static bool is_chert(const char *dir)
return false;
}
+static void srch_free(struct srch *srch)
+{
+ delete srch->qp;
+ delete srch->db;
+ free(srch);
+}
+
+static void srch_cache_renew(struct srch *keep)
+{
+ khint_t k;
+
+ // can't delete while iterating, so just free each + clear
+ for (k = kh_begin(srch_cache); k != kh_end(srch_cache); k++) {
+ if (!kh_exist(srch_cache, k)) continue;
+ struct srch *cur = kh_key(srch_cache, k);
+
+ if (cur != keep)
+ srch_free(cur);
+ }
+ srch_set_cs_clear(srch_cache);
+ if (keep) {
+ int absent;
+ k = srch_set_put(srch_cache, keep, &absent);
+ assert(absent);
+ assert(k < kh_end(srch_cache));
+ }
+}
+
static bool srch_init(struct req *req)
{
int i;
@@ -563,6 +592,13 @@ static bool srch_init(struct req *req)
Xapian::QueryParser::FLAG_WILDCARD;
if (is_chert(req->dirv[0]))
srch->qp_flags &= ~FLAG_PHRASE;
+ long nfd = req->dirc * SHARD_COST;
+
+ shard_nfd += nfd;
+ if (shard_nfd > my_fd_max) {
+ srch_cache_renew(srch);
+ shard_nfd = nfd;
+ }
try {
srch->db = new Xapian::Database(req->dirv[0]);
} catch (...) {
@@ -629,13 +665,6 @@ static void srch_init_extra(struct req *req)
req->srch->qp_extra_done = true;
}
-static void srch_free(struct srch *srch)
-{
- delete srch->qp;
- delete srch->db;
- free(srch);
-}
-
static void dispatch(struct req *req)
{
int c;
@@ -900,12 +929,7 @@ static void cleanup_all(void)
cleanup_pids();
if (!srch_cache)
return;
-
- khint_t k;
- for (k = kh_begin(srch_cache); k != kh_end(srch_cache); k++) {
- if (kh_exist(srch_cache, k))
- srch_free(kh_key(srch_cache, k));
- }
+ srch_cache_renew(NULL);
srch_set_destroy(srch_cache);
srch_cache = NULL;
}
@@ -1041,12 +1065,20 @@ int main(int argc, char *argv[])
socklen_t slen = (socklen_t)sizeof(c);
stdout_path = getenv("STDOUT_PATH");
stderr_path = getenv("STDERR_PATH");
+ struct rlimit rl;
if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &c, &slen))
err(EXIT_FAILURE, "getsockopt");
if (c != SOCK_SEQPACKET)
errx(EXIT_FAILURE, "stdin is not SOCK_SEQPACKET");
+ if (getrlimit(RLIMIT_NOFILE, &rl))
+ err(EXIT_FAILURE, "getrlimit");
+ my_fd_max = rl.rlim_cur;
+ if (my_fd_max < 72)
+ warnx("W: RLIMIT_NOFILE=%ld too low\n", my_fd_max);
+ my_fd_max -= 64;
+
mail_nrp_init();
code_nrp_init();
srch_cache = srch_set_init();
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 78be8539..b0fa75a2 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -284,4 +284,27 @@ for my $n (@NO_CXX) {
}
}
+SKIP: {
+ my $nr = $ENV{TEST_XH_FDMAX} or
+ skip 'TEST_XH_FDMAX unset', 1;
+ my @xhc = map {
+ local $ENV{PI_NO_CXX} = $_;
+ PublicInbox::XapClient::start_helper('-j0');
+ } @NO_CXX;
+ my $n = 1;
+ my $exp;
+ for (0..(PublicInbox::Search::ulimit_n() * $nr)) {
+ for my $xhc (@xhc) {
+ my $r = $xhc->mkreq([], qw(mset -Q), "tst$n=XTST$n",
+ @ibx_shard_args, qw(rt:0..));
+ chomp(my @res = readline($r));
+ $exp //= $res[0];
+ $exp eq $res[0] or
+ is $exp, $res[0], "mset mismatch on n=$n";
+ ++$n;
+ }
+ }
+ ok $exp, "got expected entries ($n)";
+}
+
done_testing;
^ permalink raw reply related [flat|nested] 11+ messages in thread