From 06bdb8c472a967b5e64714dc06534147d371a603 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Tue, 25 Apr 2023 09:45:02 -0300 Subject: [PATCH] start program to do a census of ghost messages this should check term frequence for references --- ggc.cc | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 ggc.cc diff --git a/ggc.cc b/ggc.cc new file mode 100644 index 00000000..8b558567 --- /dev/null +++ b/ggc.cc @@ -0,0 +1,26 @@ +#include +#include +int main(int argc, char **argv){ + if (argc != 2) { + fprintf (stderr, "usage: ggc xapian-database\n"); + exit (1); + } + + Xapian::Database db(argv[1]); + Xapian::Enquire enquire(db); + + enquire.set_query(Xapian::Query("Tghost")); + + auto mset = enquire.get_mset (0,db.get_doccount ()); + + for (auto iter=mset.begin (); iter != mset.end(); iter++){ + std::string mid, tid; + auto doc = iter.get_document (); + auto term_iter = doc.termlist_begin (); + + term_iter.skip_to ("Q"); + mid=(*term_iter).substr(1); + std::cout << "docid = " << *iter; + std::cout << " mid " << mid << std::endl; + } +} -- git format-patch -1 --stdout -C 06bdb8c472a967b5e64714dc06534147d371a603