From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org
Subject: [PATCH 14/16] CLI/git: create CachedIndex class
Date: Sat, 23 Apr 2022 10:38:46 -0300 [thread overview]
Message-ID: <20220423133848.3852688-15-david@tethera.net> (raw)
In-Reply-To: <20220423133848.3852688-1-david@tethera.net>
The "git-read-tree HEAD" is a bottleneck, but unfortunately sometimes
is needed. Cache the index checksum and hash to reduce the number of
times the operation is run. The overall design is a simplified version
of the PrivateIndex class, which is partially refactored to support
the new class.
---
notmuch-git.in | 136 +++++++++++++++++++++++++++++++++++--------------
1 file changed, 97 insertions(+), 39 deletions(-)
diff --git a/notmuch-git.in b/notmuch-git.in
index b3f71699..261b3f85 100755
--- a/notmuch-git.in
+++ b/notmuch-git.in
@@ -342,41 +342,98 @@ def _is_committed(status):
return len(status['added']) + len(status['deleted']) == 0
+class CachedIndex:
+ def __init__(self, repo, treeish):
+ self.cache_path = _os.path.join(repo, 'notmuch', 'index_cache.json')
+ self.index_path = _os.path.join(repo, 'index')
+ self.current_treeish = treeish
+ # cached values
+ self.treeish = None
+ self.hash = None
+ self.index_checksum = None
+
+ self._load_cache_file()
+
+ def _load_cache_file(self):
+ try:
+ with open(self.cache_path) as f:
+ data = _json.load(f)
+ self.treeish = data['treeish']
+ self.hash = data['hash']
+ self.index_checksum = data['index_checksum']
+ except FileNotFoundError:
+ pass
+ except _json.JSONDecodeError:
+ _LOG.error("Error decoding cache")
+ _sys.exit(1)
+
+ def __enter__(self):
+ self.read_tree()
+ return self
+
+ def __exit__(self, type, value, traceback):
+ checksum = _read_index_checksum(self.index_path)
+ (_, hash, _) = _git(
+ args=['rev-parse', self.current_treeish],
+ stdout=_subprocess.PIPE,
+ wait=True)
+
+ with open(self.cache_path, "w") as f:
+ _json.dump({'treeish': self.current_treeish,
+ 'hash': hash.rstrip(), 'index_checksum': checksum }, f)
+
+ @timed
+ def read_tree(self):
+ current_checksum = _read_index_checksum(self.index_path)
+ (_, hash, _) = _git(
+ args=['rev-parse', self.current_treeish],
+ stdout=_subprocess.PIPE,
+ wait=True)
+ current_hash = hash.rstrip()
+
+ if self.current_treeish == self.treeish and \
+ self.index_checksum and self.index_checksum == current_checksum and \
+ self.hash and self.hash == current_hash:
+ return
+
+ _git(args=['read-tree', self.current_treeish], wait=True)
+
+
def commit(treeish='HEAD', message=None):
"""
Commit prefix-matching tags from the notmuch database to Git.
"""
+
status = get_status()
if _is_committed(status=status):
_LOG.warning('Nothing to commit')
return
- _git(args=['read-tree', '--empty'], wait=True)
- _git(args=['read-tree', treeish], wait=True)
- try:
- _update_index(status=status)
- (_, tree, _) = _git(
- args=['write-tree'],
- stdout=_subprocess.PIPE,
- wait=True)
- (_, parent, _) = _git(
- args=['rev-parse', treeish],
- stdout=_subprocess.PIPE,
- wait=True)
- (_, commit, _) = _git(
- args=['commit-tree', tree.strip(), '-p', parent.strip()],
- input=message,
- stdout=_subprocess.PIPE,
- wait=True)
- _git(
- args=['update-ref', treeish, commit.strip()],
- stdout=_subprocess.PIPE,
- wait=True)
- except Exception as e:
- _git(args=['read-tree', '--empty'], wait=True)
- _git(args=['read-tree', treeish], wait=True)
- raise
+ with CachedIndex(NMBGIT, treeish) as index:
+ try:
+ _update_index(status=status)
+ (_, tree, _) = _git(
+ args=['write-tree'],
+ stdout=_subprocess.PIPE,
+ wait=True)
+ (_, parent, _) = _git(
+ args=['rev-parse', treeish],
+ stdout=_subprocess.PIPE,
+ wait=True)
+ (_, commit, _) = _git(
+ args=['commit-tree', tree.strip(), '-p', parent.strip()],
+ input=message,
+ stdout=_subprocess.PIPE,
+ wait=True)
+ _git(
+ args=['update-ref', treeish, commit.strip()],
+ stdout=_subprocess.PIPE,
+ wait=True)
+ except Exception as e:
+ _git(args=['read-tree', '--empty'], wait=True)
+ _git(args=['read-tree', treeish], wait=True)
+ raise
@timed
def _update_index(status):
@@ -664,7 +721,7 @@ class PrivateIndex:
return self
def __exit__(self, type, value, traceback):
- checksum = self._read_index_checksum()
+ checksum = _read_index_checksum(self.index_path)
(count, uuid, lastmod) = _read_database_lastmod()
with open(self.cache_path, "w") as f:
_json.dump({'prefix': self.current_prefix, 'uuid': uuid, 'lastmod': lastmod, 'checksum': checksum }, f)
@@ -683,23 +740,11 @@ class PrivateIndex:
_LOG.error("Error decoding cache")
_sys.exit(1)
- def _read_index_checksum (self):
- """Read the index checksum, as defined by index-format.txt in the git source
- WARNING: assumes SHA1 repo"""
- import binascii
- try:
- with open(self.index_path, 'rb') as f:
- size=_os.path.getsize(self.index_path)
- f.seek(size-20);
- return binascii.hexlify(f.read(20)).decode('ascii')
- except FileNotFoundError:
- return None
-
@timed
def _index_tags(self):
"Write notmuch tags to private git index."
prefix = '+{0}'.format(_ENCODED_TAG_PREFIX)
- current_checksum = self._read_index_checksum()
+ current_checksum = _read_index_checksum(self.index_path)
if (self.prefix == None or self.prefix != self.current_prefix
or self.checksum == None or self.checksum != current_checksum):
_git(
@@ -755,6 +800,19 @@ class PrivateIndex:
s[id].add(tag)
return s
+def _read_index_checksum (index_path):
+ """Read the index checksum, as defined by index-format.txt in the git source
+ WARNING: assumes SHA1 repo"""
+ import binascii
+ try:
+ with open(index_path, 'rb') as f:
+ size=_os.path.getsize(index_path)
+ f.seek(size-20);
+ return binascii.hexlify(f.read(20)).decode('ascii')
+ except FileNotFoundError:
+ return None
+
+
def _clear_tags_for_message(index, id):
"""
Clear any existing index entries for message 'id'
--
2.35.2
next prev parent reply other threads:[~2022-04-23 13:40 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-23 13:38 WIP: promote nmbug to user sync tool David Bremner
2022-04-23 13:38 ` [PATCH 01/16] nmbug: promote to user tool "notmuch-git" David Bremner
2022-04-23 13:38 ` [PATCH 02/16] notmuch-git: add --git-dir argument David Bremner
2022-04-23 13:38 ` [PATCH 03/16] notmuch-git: add --tag-prefix argument David Bremner
2022-04-23 13:38 ` [PATCH 04/16] test: initial tests for notmuch-git David Bremner
2022-04-23 13:38 ` [PATCH 05/16] nmbug: Add an 'init' command David Bremner
2022-04-23 13:38 ` [PATCH 06/16] CLI/git: suppress warnings about initial branch name David Bremner
2022-04-23 13:38 ` [PATCH 07/16] test: use "notmuch git init" for tests David Bremner
2022-04-23 13:38 ` [PATCH 08/16] CLI/git: make existance of config branch optional on clone David Bremner
2022-04-23 13:38 ` [PATCH 09/16] test/git: add known broken test for tag with quotes David Bremner
2022-04-23 13:38 ` [PATCH 10/16] CLI/git: replace enumeration of tags with sexp query David Bremner
2022-04-23 13:38 ` [PATCH 11/16] CLI/git: add @timed decorator, time a few functions David Bremner
2022-04-23 13:38 ` [PATCH 12/16] CLI/git: rename private index file David Bremner
2022-04-23 13:38 ` [PATCH 13/16] CLI/git: create PrivateIndex class David Bremner
2022-04-23 13:38 ` David Bremner [this message]
2022-04-23 13:38 ` [PATCH 15/16] debian: install notmuch-git David Bremner
2022-04-23 13:38 ` [PATCH 16/16] WIP: start manual page for notmuch-git David Bremner
2022-04-23 18:49 ` WIP: promote nmbug to user sync tool David Bremner
2022-04-30 17:33 ` Sean Whitton
2022-05-08 0:01 ` David Bremner
2022-05-20 23:05 ` Sean Whitton
2022-05-29 11:00 ` David Bremner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://notmuchmail.org/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220423133848.3852688-15-david@tethera.net \
--to=david@tethera.net \
--cc=notmuch@notmuchmail.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://yhetil.org/notmuch.git/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).