From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org
Subject: [PATCH 13/16] CLI/git: create PrivateIndex class
Date: Sat, 23 Apr 2022 10:38:45 -0300 [thread overview]
Message-ID: <20220423133848.3852688-14-david@tethera.net> (raw)
In-Reply-To: <20220423133848.3852688-1-david@tethera.net>
If the index file matches a previously known revision of the database,
we can update the index incrementally using the recorded lastmod
counter. This is typically much faster than a full update, although it
could be slower in the case of large changes to the database.
---
notmuch-git.in | 220 ++++++++++++++++++++++++++++++++---------------
test/T850-git.sh | 41 +++++++++
2 files changed, 194 insertions(+), 67 deletions(-)
diff --git a/notmuch-git.in b/notmuch-git.in
index b69d57e7..b3f71699 100755
--- a/notmuch-git.in
+++ b/notmuch-git.in
@@ -50,6 +50,10 @@ except ImportError: # Python 2
from urllib import quote as _quote
from urllib import unquote as _unquote
+import json as _json
+
+# hopefully big enough, handle 32 bit hosts
+MAX_LASTMOD=2**32
__version__ = '@NOTMUCH_VERSION@'
@@ -621,51 +625,159 @@ def get_status():
'deleted': {},
'missing': {},
}
- index = _index_tags()
- maybe_deleted = _diff_index(index=index, filter='D')
- for id, tags in maybe_deleted.items():
- (_, stdout, stderr) = _spawn(
- args=['notmuch', 'search', '--output=files', 'id:{0}'.format(id)],
- stdout=_subprocess.PIPE,
- wait=True)
- if stdout:
- status['deleted'][id] = tags
- else:
- status['missing'][id] = tags
- status['added'] = _diff_index(index=index, filter='A')
- _os.remove(index)
+ with PrivateIndex(repo=NMBGIT, prefix=TAG_PREFIX) as index:
+ maybe_deleted = index.diff(filter='D')
+ for id, tags in maybe_deleted.items():
+ (_, stdout, stderr) = _spawn(
+ args=['notmuch', 'search', '--output=files', 'id:{0}'.format(id)],
+ stdout=_subprocess.PIPE,
+ wait=True)
+ if stdout:
+ status['deleted'][id] = tags
+ else:
+ status['missing'][id] = tags
+ status['added'] = index.diff(filter='A')
+
return status
-@timed
-def _index_tags():
- "Write notmuch tags to private git index."
- ensure_private_directory(NMBGIT)
- path = _os.path.join(NMBGIT, 'notmuch','index')
- prefix = '+{0}'.format(_ENCODED_TAG_PREFIX)
- _git(
- args=['read-tree', '--empty'],
- additional_env={'GIT_INDEX_FILE': path}, wait=True)
- with _spawn(
- args=['notmuch', 'dump', '--format=batch-tag', '--query=sexp', '--', _tag_query()],
- stdout=_subprocess.PIPE) as notmuch:
+class PrivateIndex:
+ def __init__(self, repo, prefix):
+ try:
+ _os.makedirs(_os.path.join(repo, 'notmuch'))
+ except FileExistsError:
+ pass
+
+ file_name = 'notmuch/index'
+ self.index_path = _os.path.join(repo, file_name)
+ self.cache_path = _os.path.join(repo, 'notmuch', '{:s}.json'.format(_hex_quote(file_name)))
+
+ self.current_prefix = prefix
+
+ self.prefix = None
+ self.uuid = None
+ self.lastmod = None
+ self.checksum = None
+ self._load_cache_file()
+ self._index_tags()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, traceback):
+ checksum = self._read_index_checksum()
+ (count, uuid, lastmod) = _read_database_lastmod()
+ with open(self.cache_path, "w") as f:
+ _json.dump({'prefix': self.current_prefix, 'uuid': uuid, 'lastmod': lastmod, 'checksum': checksum }, f)
+
+ def _load_cache_file(self):
+ try:
+ with open(self.cache_path) as f:
+ data = _json.load(f)
+ self.prefix = data['prefix']
+ self.uuid = data['uuid']
+ self.lastmod = data['lastmod']
+ self.checksum = data['checksum']
+ except FileNotFoundError:
+ return None
+ except _json.JSONDecodeError:
+ _LOG.error("Error decoding cache")
+ _sys.exit(1)
+
+ def _read_index_checksum (self):
+ """Read the index checksum, as defined by index-format.txt in the git source
+ WARNING: assumes SHA1 repo"""
+ import binascii
+ try:
+ with open(self.index_path, 'rb') as f:
+ size=_os.path.getsize(self.index_path)
+ f.seek(size-20);
+ return binascii.hexlify(f.read(20)).decode('ascii')
+ except FileNotFoundError:
+ return None
+
+ @timed
+ def _index_tags(self):
+ "Write notmuch tags to private git index."
+ prefix = '+{0}'.format(_ENCODED_TAG_PREFIX)
+ current_checksum = self._read_index_checksum()
+ if (self.prefix == None or self.prefix != self.current_prefix
+ or self.checksum == None or self.checksum != current_checksum):
+ _git(
+ args=['read-tree', '--empty'],
+ additional_env={'GIT_INDEX_FILE': self.index_path}, wait=True)
+
+ query = _tag_query()
+ clear_tags = False
+ (count,uuid,lastmod) = _read_database_lastmod()
+ if self.prefix == self.current_prefix and self.uuid \
+ and self.uuid == uuid and self.checksum == current_checksum:
+ query = '(and (infix "lastmod:{:d}..")) {:s})'.format(self.lastmod+1, query)
+ clear_tags = True
+ with _spawn(
+ args=['notmuch', 'dump', '--format=batch-tag', '--query=sexp', '--', query],
+ stdout=_subprocess.PIPE) as notmuch:
+ with _git(
+ args=['update-index', '--index-info'],
+ stdin=_subprocess.PIPE,
+ additional_env={'GIT_INDEX_FILE': self.index_path}) as git:
+ for line in notmuch.stdout:
+ if line.strip().startswith('#'):
+ continue
+ (tags_string, id) = [_.strip() for _ in line.split(' -- id:')]
+ tags = [
+ _unquote(tag[len(prefix):])
+ for tag in tags_string.split()
+ if tag.startswith(prefix)]
+ id = _xapian_unquote(string=id)
+ if clear_tags:
+ for line in _clear_tags_for_message(index=self.index_path, id=id):
+ git.stdin.write(line)
+ for line in _index_tags_for_message(
+ id=id, status='A', tags=tags):
+ git.stdin.write(line)
+
+ @timed
+ def diff(self, filter):
+ """
+ Get an {id: {tag, ...}} dict for a given filter.
+
+ For example, use 'A' to find added tags, and 'D' to find deleted tags.
+ """
+ s = _collections.defaultdict(set)
with _git(
- args=['update-index', '--index-info'],
- stdin=_subprocess.PIPE,
- additional_env={'GIT_INDEX_FILE': path}) as git:
- for line in notmuch.stdout:
- if line.strip().startswith('#'):
- continue
- (tags_string, id) = [_.strip() for _ in line.split(' -- id:')]
- tags = [
- _unquote(tag[len(prefix):])
- for tag in tags_string.split()
- if tag.startswith(prefix)]
- id = _xapian_unquote(string=id)
- for line in _index_tags_for_message(
- id=id, status='A', tags=tags):
- git.stdin.write(line)
- return path
+ args=[
+ 'diff-index', '--cached', '--diff-filter', filter,
+ '--name-only', 'HEAD'],
+ additional_env={'GIT_INDEX_FILE': self.index_path},
+ stdout=_subprocess.PIPE) as p:
+ # Once we drop Python < 3.3, we can use 'yield from' here
+ for id, tag in _unpack_diff_lines(stream=p.stdout):
+ s[id].add(tag)
+ return s
+
+def _clear_tags_for_message(index, id):
+ """
+ Clear any existing index entries for message 'id'
+
+ Neither 'id' nor the tags in 'tags' should be encoded/escaped.
+ """
+
+ dir = 'tags/{id}'.format(id=_hex_quote(string=id))
+
+ with _git(
+ args=['ls-files', dir],
+ additional_env={'GIT_INDEX_FILE': index},
+ stdout=_subprocess.PIPE) as git:
+ for file in git.stdout:
+ line = '0 0000000000000000000000000000000000000000\t{:s}\n'.format(file.strip())
+ yield line
+def _read_database_lastmod():
+ with _spawn(
+ args=['notmuch', 'count', '--lastmod', '*'],
+ stdout=_subprocess.PIPE) as notmuch:
+ (count,uuid,lastmod_str) = notmuch.stdout.readline().split()
+ return (count,uuid,int(lastmod_str))
def _index_tags_for_message(id, status, tags):
"""
@@ -686,26 +798,6 @@ def _index_tags_for_message(id, status, tags):
yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path)
-@timed
-def _diff_index(index, filter):
- """
- Get an {id: {tag, ...}} dict for a given filter.
-
- For example, use 'A' to find added tags, and 'D' to find deleted tags.
- """
- s = _collections.defaultdict(set)
- with _git(
- args=[
- 'diff-index', '--cached', '--diff-filter', filter,
- '--name-only', 'HEAD'],
- additional_env={'GIT_INDEX_FILE': index},
- stdout=_subprocess.PIPE) as p:
- # Once we drop Python < 3.3, we can use 'yield from' here
- for id, tag in _unpack_diff_lines(stream=p.stdout):
- s[id].add(tag)
- return s
-
-
def _diff_refs(filter, a='HEAD', b='@{upstream}'):
with _git(
args=['diff', '--diff-filter', filter, '--name-only', a, b],
@@ -748,12 +840,6 @@ def _help(parser, command=None):
parser.parse_args(['--help'])
-def ensure_private_directory(repo):
- try:
- _os.makedirs(_os.path.join(repo, 'notmuch'))
- except FileExistsError:
- pass
-
if __name__ == '__main__':
import argparse
diff --git a/test/T850-git.sh b/test/T850-git.sh
index 4bf29b20..2358690f 100755
--- a/test/T850-git.sh
+++ b/test/T850-git.sh
@@ -33,6 +33,47 @@ notmuch tag '-"quoted tag"' '*'
git -C clone2.git ls-tree -r --name-only HEAD | grep /inbox > AFTER
test_expect_equal_file_nonempty BEFORE AFTER
+test_begin_subtest "commit (incremental)"
+notmuch tag +test id:20091117190054.GU3165@dottiness.seas.harvard.edu
+notmuch git -C tags.git -p '' commit
+git -C tags.git ls-tree -r --name-only HEAD |
+ grep 20091117190054 | sort > OUTPUT
+echo "--------------------------------------------------" >> OUTPUT
+notmuch tag -test id:20091117190054.GU3165@dottiness.seas.harvard.edu
+notmuch git -C tags.git -p '' commit
+git -C tags.git ls-tree -r --name-only HEAD |
+ grep 20091117190054 | sort >> OUTPUT
+cat <<EOF > EXPECTED
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/test
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+--------------------------------------------------
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+EOF
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+
+test_begin_subtest "commit (change prefix)"
+notmuch tag +test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu
+notmuch git -C tags.git -p 'test::' commit
+git -C tags.git ls-tree -r --name-only HEAD |
+ grep 20091117190054 | sort > OUTPUT
+echo "--------------------------------------------------" >> OUTPUT
+notmuch tag -test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu
+notmuch git -C tags.git -p '' commit
+git -C tags.git ls-tree -r --name-only HEAD |
+ grep 20091117190054 | sort >> OUTPUT
+cat <<EOF > EXPECTED
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/one
+--------------------------------------------------
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+EOF
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+
test_begin_subtest "checkout"
notmuch dump > BEFORE
notmuch tag -inbox '*'
--
2.35.2
next prev parent reply other threads:[~2022-04-23 13:39 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-23 13:38 WIP: promote nmbug to user sync tool David Bremner
2022-04-23 13:38 ` [PATCH 01/16] nmbug: promote to user tool "notmuch-git" David Bremner
2022-04-23 13:38 ` [PATCH 02/16] notmuch-git: add --git-dir argument David Bremner
2022-04-23 13:38 ` [PATCH 03/16] notmuch-git: add --tag-prefix argument David Bremner
2022-04-23 13:38 ` [PATCH 04/16] test: initial tests for notmuch-git David Bremner
2022-04-23 13:38 ` [PATCH 05/16] nmbug: Add an 'init' command David Bremner
2022-04-23 13:38 ` [PATCH 06/16] CLI/git: suppress warnings about initial branch name David Bremner
2022-04-23 13:38 ` [PATCH 07/16] test: use "notmuch git init" for tests David Bremner
2022-04-23 13:38 ` [PATCH 08/16] CLI/git: make existance of config branch optional on clone David Bremner
2022-04-23 13:38 ` [PATCH 09/16] test/git: add known broken test for tag with quotes David Bremner
2022-04-23 13:38 ` [PATCH 10/16] CLI/git: replace enumeration of tags with sexp query David Bremner
2022-04-23 13:38 ` [PATCH 11/16] CLI/git: add @timed decorator, time a few functions David Bremner
2022-04-23 13:38 ` [PATCH 12/16] CLI/git: rename private index file David Bremner
2022-04-23 13:38 ` David Bremner [this message]
2022-04-23 13:38 ` [PATCH 14/16] CLI/git: create CachedIndex class David Bremner
2022-04-23 13:38 ` [PATCH 15/16] debian: install notmuch-git David Bremner
2022-04-23 13:38 ` [PATCH 16/16] WIP: start manual page for notmuch-git David Bremner
2022-04-23 18:49 ` WIP: promote nmbug to user sync tool David Bremner
2022-04-30 17:33 ` Sean Whitton
2022-05-08 0:01 ` David Bremner
2022-05-20 23:05 ` Sean Whitton
2022-05-29 11:00 ` David Bremner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://notmuchmail.org/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220423133848.3852688-14-david@tethera.net \
--to=david@tethera.net \
--cc=notmuch@notmuchmail.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://yhetil.org/notmuch.git/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).