* [PATCH] WIP/git: change internal directory layout
2022-06-18 21:47 RFC: directory layout for notmuch git David Bremner
@ 2022-06-18 21:47 ` David Bremner
0 siblings, 0 replies; 2+ messages in thread
From: David Bremner @ 2022-06-18 21:47 UTC (permalink / raw)
To: notmuch
Add 4 layers of hashed directories in order to prevent large numbers
of subdirectories in any one directory.
Currently there is no migration strategy for old style notmuch-git /
nmbug repositories.
---
notmuch-git.py | 17 ++++++++++++-----
test/T850-git.sh | 48 ++++++++++++++++++++++++------------------------
test/test-lib.sh | 4 ++++
3 files changed, 40 insertions(+), 29 deletions(-)
diff --git a/notmuch-git.py b/notmuch-git.py
index f188660c..61c5fe29 100644
--- a/notmuch-git.py
+++ b/notmuch-git.py
@@ -49,7 +49,7 @@ TAG_PREFIX = None
_HEX_ESCAPE_REGEX = _re.compile('%[0-9A-F]{2}')
_TAG_DIRECTORY = 'tags/'
-_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '(?P<id>[^/]*)/(?P<tag>[^/]*)')
+_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '([0-9a-f]{2}/){4}(?P<id>[^/]*)/(?P<tag>[^/]*)')
# magic hash for Git (git hash-object -t blob /dev/null)
_EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
@@ -265,7 +265,7 @@ def archive(treeish='HEAD', args=()):
Each tag $tag for message with Message-Id $id is written to
an empty file
- tags/encode($id)/encode($tag)
+ tags/hash1(id)/hash2(id)/hash3(id)/hash4(id)/encode($id)/encode($tag)
The encoding preserves alphanumerics, and the characters
"+-_@=.:," (not the quotes). All other octets are replaced with
@@ -821,7 +821,7 @@ def _clear_tags_for_message(index, id):
Neither 'id' nor the tags in 'tags' should be encoded/escaped.
"""
- dir = 'tags/{id}'.format(id=_hex_quote(string=id))
+ dir = _id_path(id)
with _git(
args=['ls-files', dir],
@@ -838,6 +838,14 @@ def _read_database_lastmod():
(count,uuid,lastmod_str) = notmuch.stdout.readline().split()
return (count,uuid,int(lastmod_str))
+def _id_path(id):
+ from hashlib import blake2b
+ hid=_hex_quote(string=id)
+ idhash = blake2b(hid.encode('utf8'), digest_size=4).hexdigest()
+ return 'tags/{dir1}/{dir2}/{dir3}/{dir4}/{hid}'.format(hid=hid,
+ dir1=idhash[0:2],dir2=idhash[2:4],
+ dir3=idhash[4:6],dir4=idhash[6:])
+
def _index_tags_for_message(id, status, tags):
"""
Update the Git index to either create or delete an empty file.
@@ -852,8 +860,7 @@ def _index_tags_for_message(id, status, tags):
hash = '0000000000000000000000000000000000000000'
for tag in tags:
- path = 'tags/{id}/{tag}'.format(
- id=_hex_quote(string=id), tag=_hex_quote(string=tag))
+ path = '{ipath}/{tag}'.format(ipath=_id_path(id),tag=_hex_quote(string=tag))
yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path)
diff --git a/test/T850-git.sh b/test/T850-git.sh
index 7ea50939..dfff2369 100755
--- a/test/T850-git.sh
+++ b/test/T850-git.sh
@@ -40,10 +40,10 @@ notmuch tag -new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu
test_begin_subtest "committing new prefix works with force"
notmuch tag +new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -l debug -p 'new-prefix::' -C force-prefix.git commit --force
-git -C force-prefix.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT
+git -C force-prefix.git ls-tree -r --name-only HEAD | xargs dirname | notmuch_git_sanitize | sort -u > OUTPUT
notmuch tag -new-prefix::foo id:20091117190054.GU3165@dottiness.seas.harvard.edu
cat <<EOF>EXPECTED
-id:20091117190054.GU3165@dottiness.seas.harvard.edu
+20091117190054.GU3165@dottiness.seas.harvard.edu
EOF
test_expect_equal_file_nonempty EXPECTED OUTPUT
@@ -62,8 +62,8 @@ test_expect_equal_file_nonempty EXPECTED OUTPUT
test_begin_subtest "commit"
notmuch git -C tags.git commit --force
-git -C tags.git ls-tree -r --name-only HEAD | xargs dirname | sort -u | sed s,tags/,id:, > OUTPUT
-notmuch search --output=messages '*' | sort > EXPECTED
+git -C tags.git ls-tree -r --name-only HEAD | xargs dirname | notmuch_git_sanitize | sort -u > OUTPUT
+notmuch search --output=messages '*' | sed s/^id:// | sort > EXPECTED
test_expect_equal_file_nonempty EXPECTED OUTPUT
test_begin_subtest "commit --force succeeds"
@@ -88,22 +88,22 @@ test_expect_equal_file_nonempty BEFORE AFTER
test_begin_subtest "commit (incremental)"
notmuch tag +test id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git commit
-git -C tags.git ls-tree -r --name-only HEAD |
+git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \
grep 20091117190054 | sort > OUTPUT
echo "--------------------------------------------------" >> OUTPUT
notmuch tag -test id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git commit
-git -C tags.git ls-tree -r --name-only HEAD |
+git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \
grep 20091117190054 | sort >> OUTPUT
cat <<EOF > EXPECTED
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/test
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/test
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
--------------------------------------------------
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
EOF
test_expect_equal_file_nonempty EXPECTED OUTPUT
@@ -111,18 +111,18 @@ test_begin_subtest "commit (change prefix)"
notmuch tag +test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git -p 'test::' commit --force
git -C tags.git ls-tree -r --name-only HEAD |
- grep 20091117190054 | sort > OUTPUT
+ grep 20091117190054 | notmuch_git_sanitize | sort > OUTPUT
echo "--------------------------------------------------" >> OUTPUT
notmuch tag -test::one id:20091117190054.GU3165@dottiness.seas.harvard.edu
notmuch git -C tags.git commit --force
-git -C tags.git ls-tree -r --name-only HEAD |
+git -C tags.git ls-tree -r --name-only HEAD | notmuch_git_sanitize | \
grep 20091117190054 | sort >> OUTPUT
cat <<EOF > EXPECTED
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/one
+20091117190054.GU3165@dottiness.seas.harvard.edu/one
--------------------------------------------------
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
EOF
test_expect_equal_file_nonempty EXPECTED OUTPUT
@@ -151,12 +151,12 @@ test_expect_equal_file_nonempty BEFORE AFTER
test_begin_subtest "archive"
notmuch git -C tags.git archive | tar tf - | \
- grep 20091117190054.GU3165@dottiness.seas.harvard.edu | sort > OUTPUT
+ grep 20091117190054.GU3165@dottiness.seas.harvard.edu | notmuch_git_sanitize | sort > OUTPUT
cat <<EOF > EXPECTED
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/signed
-tags/20091117190054.GU3165@dottiness.seas.harvard.edu/unread
+20091117190054.GU3165@dottiness.seas.harvard.edu/
+20091117190054.GU3165@dottiness.seas.harvard.edu/inbox
+20091117190054.GU3165@dottiness.seas.harvard.edu/signed
+20091117190054.GU3165@dottiness.seas.harvard.edu/unread
EOF
notmuch git -C tags.git checkout
test_expect_equal_file EXPECTED OUTPUT
diff --git a/test/test-lib.sh b/test/test-lib.sh
index 59b6079d..ad490293 100644
--- a/test/test-lib.sh
+++ b/test/test-lib.sh
@@ -545,6 +545,10 @@ notmuch_date_sanitize () {
-e 's/^Date: Fri, 05 Jan 2001 .*0000/Date: GENERATED_DATE/'
}
+# remove redundant parts of notmuch-git internal paths
+notmuch_git_sanitize () {
+ sed 's,tags/\([0-9a-f]\{2\}/\)\{4\},,'
+}
notmuch_uuid_sanitize () {
sed 's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g'
}
--
2.35.2
^ permalink raw reply related [flat|nested] 2+ messages in thread