* Re: On disk tag storage format
2013-10-05 1:28 ` Ethan Glasser-Camp
@ 2013-10-07 4:49 ` Ethan Glasser-Camp
0 siblings, 0 replies; 6+ messages in thread
From: Ethan Glasser-Camp @ 2013-10-07 4:49 UTC (permalink / raw)
To: David Bremner, notmuch mailing list
[-- Attachment #1: Type: text/plain, Size: 628 bytes --]
Ethan Glasser-Camp <ethan.glasser.camp@gmail.com> writes:
> I've modified the script so that it would run by mangling filenames,
> which is irreversible (the original tried to encode/decode filenames
> reversibly). Then I got a little carried away, adding --verbose and
> --dry-run options as well as removing a couple trailing
> semicolons. Here's my version, in case it should interest anyone else.
Hi guys,
There was a bug in the previous version I sent. It didn't handle
unlinking tags correctly. Also, I spotted a bug in syncing to untagged
messages. Maybe I should stop using emails as version control.
---- 8< ----
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: slightly more tested this time --]
[-- Type: text/x-python, Size: 6574 bytes --]
# Copyright 2013, David Bremner <david@tethera.net>
# Licensed under the same terms as notmuch.
import notmuch
import re
import os, errno
import sys
from collections import defaultdict
import argparse
import hashlib
# skip automatic and maildir tags
skiptags = re.compile(r"^(attachement|signed|encrypted|draft|flagged|passed|replied|unread)$")
# some random person on stack overflow suggests:
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else: raise
VERBOSE = False
def log(msg):
if VERBOSE:
print(msg)
CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'
encode_re = '([^{0}])'.format(CHARSET)
decode_re = '[%]([0-7][0-9A-Fa-f])'
def encode_one_char(match):
return('%{:02x}'.format(ord(match.group(1))))
def encode_for_fs(str):
return re.sub(encode_re,encode_one_char, str,0)
def mangle_message_id(msg_id):
"""
Return a mangled version of the message id, suitable for use as a filename.
"""
MAX_LENGTH = 143
FLAGS_LENGTH = 8 # :2,S...??
encoded = encode_for_fs(msg_id)
if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:
return encoded
SHA_LENGTH = 8
TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH
PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2
prefix = encoded[:PREFIX_LENGTH]
suffix = encoded[-SUFFIX_LENGTH:]
sha = hashlib.sha256()
sha.update(encoded)
return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]
def decode_one_char(match):
return chr(int(match.group(1),16))
def decode_from_fs(str):
return re.sub(decode_re,decode_one_char, str, 0)
def mk_tag_dir(tagdir):
mkdir_p (os.path.join(tagdir, 'cur'))
mkdir_p (os.path.join(tagdir, 'new'))
mkdir_p (os.path.join(tagdir, 'tmp'))
flagpart = '(:2,[^:]*)'
flagre = re.compile(flagpart + '$');
def path_for_msg (dir, msg):
filename = msg.get_filename()
flagsmatch = flagre.search(filename)
if flagsmatch == None:
flags = ''
else:
flags = flagsmatch.group(1)
return os.path.join(dir, 'cur', mangle_message_id(msg.get_message_id()) + flags)
def unlink_message(dir, msg):
dir = os.path.join(dir, 'cur')
mangled_id = mangle_message_id(msg.get_message_id())
filepattern = '^' + re.escape(mangled_id) + flagpart +'?$'
filere = re.compile(filepattern)
for file in os.listdir(dir):
if filere.match(file):
log("Unlinking {}".format(os.path.join(dir, file)))
if not opts.dry_run:
os.unlink(os.path.join(dir, file))
def dir_for_tag(tag):
enc_tag = encode_for_fs (tag)
return os.path.join(tagroot, enc_tag)
disk_tags = defaultdict(set)
disk_ids = set()
def read_tags_from_disk(rootdir):
for root, subFolders, files in os.walk(rootdir):
for filename in files:
mangled_id = filename.split(':')[0]
tag = root.split('/')[-2]
disk_ids.add(mangled_id)
disk_tags[mangled_id].add(decode_from_fs(tag))
# Main program
parser = argparse.ArgumentParser(description='Sync notmuch tag database to/from link farm')
parser.add_argument('-l','--link-style',choices=['hard','symbolic', 'adaptive'],
default='adaptive')
parser.add_argument('-d','--destination',choices=['disk','notmuch'], default='disk')
parser.add_argument('-t','--threshold', default=50000L, type=int)
parser.add_argument('-n','--dry-run', default=False, action='store_true')
parser.add_argument('-v','--verbose', default=False, action='store_true')
parser.add_argument('tagroot')
opts=parser.parse_args()
VERBOSE = opts.verbose
tagroot=opts.tagroot
sync_from_links = (opts.destination == 'notmuch')
read_tags_from_disk(tagroot)
if sync_from_links:
db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE)
else:
db = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)
dbtags = filter (lambda tag: not skiptags.match(tag), db.get_all_tags())
if sync_from_links:
# have to iterate over even untagged messages
querystr = '*'
else:
querystr = ' OR '.join(map (lambda tag: 'tag:'+tag, dbtags))
q_new = notmuch.Query(db, querystr)
q_new.set_sort(notmuch.Query.SORT.UNSORTED)
for msg in q_new.search_messages():
# silently ignore empty tags
db_tags = set(filter (lambda tag: tag != '' and not skiptags.match(tag),
msg.get_tags()))
message_id = msg.get_message_id()
mangled_id = mangle_message_id(message_id)
disk_ids.discard(mangled_id)
missing_on_disk = db_tags.difference(disk_tags[mangled_id])
missing_in_db = disk_tags[mangled_id].difference(db_tags)
if sync_from_links:
msg.freeze()
filename = msg.get_filename()
if len(missing_on_disk) > 0:
if opts.link_style == 'adaptive':
statinfo = os.stat (filename)
symlink = (statinfo.st_size > opts.threshold)
else:
symlink = opts.link_style == 'symbolic'
for tag in missing_on_disk:
if sync_from_links:
log("Removing tag {} from {}".format(tag, message_id))
if not opts.dry_run:
msg.remove_tag(tag,sync_maildir_flags=False)
else:
tagdir = dir_for_tag (tag)
if not opts.dry_run:
mk_tag_dir (tagdir)
newlink = path_for_msg (tagdir, msg)
log("Linking {} to {}".format(filename, newlink))
if not opts.dry_run:
if symlink:
os.symlink(filename, newlink)
else:
os.link(filename, newlink)
for tag in missing_in_db:
if sync_from_links:
log("Adding {} to message {}".format(tag, message_id))
if not opts.dry_run:
msg.add_tag(tag,sync_maildir_flags=False)
else:
tagdir = dir_for_tag (tag)
unlink_message(tagdir,msg)
if sync_from_links:
msg.thaw()
# everything remaining in disk_ids is a deleted message
# unless we are syncing back to the database, in which case
# it just might not currently have any non maildir tags.
if not sync_from_links:
for root, subFolders, files in os.walk(tagroot):
for filename in files:
mangled_id = filename.split(':')[0]
if mangled_id in disk_ids:
os.unlink(os.path.join(root, filename))
db.close()
# currently empty directories are not pruned.
[-- Attachment #3: Type: text/plain, Size: 1607 bytes --]
---- 8< ----
Of course, the next step is to sync using this mechanism. Rsync doesn't
really have a concept of history, which basically makes it unusable for
this purpose [1]. Unison doesn't really understand renames, so it gets
confused when you mark a message as read (which might move it from new
to cur, and definitely changes its tags). Bremner suggested
syncmaildir. Syncmaildir doesn't understand links at all. Bremner
suggested that we could use some parts of syncmaildir to implement the
tag syncing we need.
I didn't have anything else going on this weekend so I tried to
prototype the approach. It turns out to be possible to leverage some
parts of syncmaildir. I translated a bunch of smd-client into a new
program, tagsync-client, that links to messages in an existing notmuch
DB. It seems like it's possible to use it in place of the existing
smd-client by putting lines like this in your config:
SMDCLIENT=~/src/tagsync.git/tagsync-client.py
REMOTESMDCLIENT=~/src/tagsync.git/tagsync-client.py
The sequence of commands I ran:
- linksync.py to dump tags to ~/Mail/.notmuch/exported-tags
- smd-pull mail to sync ~/Mail but excluding .notmuch
- notmuch new
- smd-pull tagsync (using the above client) to sync ~/Mail/.notmuch/exported-tags
- linksync.py to pull tags from ~/Mail/.notmuch/exported-tags
syncmaildir doesn't cope well with drafts, so it might choke on that,
and it doesn't like symlinks (it thinks they're always to directories),
so be sure to run linksync with -l hard.
Here's the script. It's a work in progress; I have only tested it once in one direction.
---- 8< ----
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: client script --]
[-- Type: text/x-python, Size: 19931 bytes --]
#! /usr/bin/env python
import sys
from sys import stdout, stdin, stderr
import stat
import urllib
import hashlib
import re
import os.path
import argparse
import subprocess
import traceback
import notmuch
import time
PROTOCOL_VERSION = "1.1"
# Not reproducing the autoconf logic
XDELTA = 'xdelta'
MDDIFF = 'mddiff'
VERBOSE = False
def log(msg):
if VERBOSE:
stderr.write("INFO: "+msg+"\n")
def __error(msg):
raise ValueError(msg)
def log_tags_and_fail(msg, *args):
log_tags(*args)
__error(msg)
def log_internal_error_and_fail(msg, *args):
log_internal_error_tags(msg, *args)
__error(msg)
def log_error(msg):
return stderr.write("ERROR: {}\n".format(msg))
def log_tag(tag):
return stderr.write("TAGS: {}\n".format(tag))
def log_progress(msg):
pass
def log_tags(context='unknown', cause='unknown', human=False, *args):
if human:
human = "necessary"
else:
human = "avoidable"
suggestions = {}
suggestions_string = ""
if len(args):
suggestions_string = ' suggested-actions({})'.format(' '.join(args))
return log_tag("error::context({}) probable-cause({}) human-intervention({})".format(
context, cause, human) + suggestions_string)
def mkdir_p(filename):
"""Maildir-aware mkdir.
Creates a directory and all parent directories.
Moreover, if the last component is 'tmp', 'cur' or 'new', the
others are created too."""
# The Lua function throws away the last path component if it
# doesn't end in /. This allows you to just call mkdir_p on any
# file and a directory for it to live will be created.
if not filename.endswith('/'):
filename, _ = os.path.split(filename)
if not filename.startswith('/'):
# This path is relative to HOME, and needs to be translated
# too.
filename = translate(filename)
filename = os.path.expanduser('~/'+filename)
dirname, basename = os.path.split(filename)
try:
os.makedirs(filename)
except OSError:
pass # probably "File exists"
MAILDIR_SUBDIRS = ['tmp', 'cur', 'new']
if basename in MAILDIR_SUBDIRS:
for subdir in MAILDIR_SUBDIRS:
to_create = os.path.join(dirname, subdir)
if not os.path.exists(to_create):
os.mkdir(to_create)
class FakeSubprocess(object):
def __init__(self, init_function):
self.init_function = init_function
self.input = None
self.output = None
self.pipe_name = None
self.removed = None
self.did_write = None
self.filter = {}
def readline(self):
if not self.input:
log_internal_error_and_fail("read called before write",
"make_slave_filter_process")
if not self.removed and self.did_write:
self.removed = True
rc = self.input.readline()
os.unlink(self.pipe_name)
return rc
else:
return self.input.readline()
def write(self, *args):
if not self.output:
self.init_function(self.filter)
self.input = self.filter['inf']
self.output = self.filter['outf']
self.pipe_name = self.filter['pipe']
self.did_write = True
self.output.write(*args)
def flush(self):
self.output.flush()
def lines(self):
return self.input.readlines()
def make_slave_filter_process(cmd, seed="no seed"):
def init(filter):
if 'inf' not in filter:
home = os.getenv('HOME')
user = os.getenv('USER') or 'nobody'
mangled_name = re.compile('[ %./]').sub('-', seed)
attempt = 0
if home:
base_dir = home + '/.smd/fifo/'
else:
base_dir = '/tmp/'
rc = subprocess.call([MDDIFF, '--mkdir-p', base_dir])
if rc != 0:
log_internal_error_and_fail('unable to create directory',
'make_slave_filter_process')
while True:
pipe_name = ''.join([base_dir, 'smd-', user, str(int(time.time())),
mangled_name, str(attempt)])
attempt += 1
rc = subprocess.call([MDDIFF, '--mkfifo', pipe_name])
if rc == 0 or attempt > 10:
break
if rc != 0:
log_internal_error_and_fail('unable to create fifo',
"make_slave_filter_process")
inferior = cmd(pipe_name)
filter['inf'] = inferior.stdout
filter['outf'] = file(pipe_name, 'w')
filter['pipe'] = pipe_name
return FakeSubprocess(init)
_translator = None
def set_translator(p):
global _translator
translator_filter = make_slave_filter_process(
lambda pipe: subprocess.Popen(p, stdin=file(pipe), stdout=subprocess.PIPE),
"translate")
if p == 'cat':
_translator = lambda x: x
else:
def translator_fn(x):
translator_filter.write(x + '\n')
translator_filter.flush()
line = translator_filter.readline()
if not line or line.strip() == 'ERROR':
log_error("Translator {} on input {} gave an error".format(
p, x))
for l in translator_filter.readlines():
log_error(l)
log_tags_and_fail("Unable to translate mailbox",
'translate', 'bad-translator', True)
if '..' in line:
log_error("Translator {} on input {} returned a path containing ..".format(
p, x))
log_tags_and_fail('Translator returned a path containing ..',
'translate', 'bad-translator', True)
return line
_translator = translator_fn
def translate(x):
if _translator:
return _translator(x)
return x
mddiff_sha_handler = make_slave_filter_process(
lambda pipe: subprocess.Popen([MDDIFF, pipe], stdout=subprocess.PIPE),
"sha_file")
def sha_file(name):
mddiff_sha_handler.write(name+'\n')
mddiff_sha_handler.flush()
data = mddiff_sha_handler.readline()
if data.startswith('ERROR'):
log_tags_and_fail("Failed to sha1 message: " + (name or "nil"),
'sha_file', 'modify-while-update', False, 'retry')
hsha, bsha = data.split()
if not hsha or not bsha:
log_internal_error_and_fail('mddiff incorrect behavior', 'mddiff')
return hsha, bsha
def exists_and_sha(name):
if os.path.exists(name):
h, b = sha_file(name)
return True, h, b
return False, False, False
def touch(f):
try:
file(f, 'r')
except IOError:
try:
file(f, 'w')
except IOError:
log_error('Unable to touch ' + quote(f))
log_tags("touch", "bad-permissions", True,
"display-permissions(" + quote(f) + ")")
error("Unable to touch a file")
def quote(s):
return repr(s)
def assert_exists(name):
assert os.exists(name), "Not found: "+repr(name)
def url_quote(txt):
return urllib.quote(txt, safe='')
def url_decode(s):
return urllib.unquote(s)
def log_internal_error_tags(msg, ctx):
log_tags('internal-error', ctx, True)
# Blob of "run gnome-open" junk not copied
def receive(inf, outfile):
try:
outf = file(outfile, 'w')
except IOError:
log_error("Unable to open " + outfile + " for writing.")
log_error('It may be caused by bad directory permissions, '+
'please check.')
log_tags("receive", "non-writeable-file", True,
"display-permissions(" + quote(outfile) +")")
error("Unable to write incoming data")
line = inf.readline()
if not line or line.strip() == "ABORT":
log_error("Data transmission failed.")
log_error("This problem is transient, please retry.")
log_tags_and_fail('server sent ABORT or connection died',
"receive", "network", False, "retry")
# In the Lua version, this is called "len", but that's a builtin
# in Python
chunk_len = int(re.compile(r'^chunk (\d+)').match(line).group(1))
total = chunk_len
while chunk_len:
next_chunk = 16384
if chunk_len < next_chunk:
next_chunk = chunk_len
data = inf.read(next_chunk)
chunk_len -= len(data)
outf.write(data)
# Probably not strictly speaking necessary in Python
outf.close()
return total
def handshake(dbfile):
stdout.write("protocol {}\n".format(PROTOCOL_VERSION))
touch(dbfile)
sha_output = subprocess.check_output([MDDIFF, '--sha1sum', dbfile])
db_sha = sha_output.split()[0]
err_msg = sha_output[sha_output.index(' ')+1:]
if db_sha == 'ERROR':
log_internal_error_and_fail('unreadable db file: '+quote(dbfile), 'handshake')
stdout.write("dbfile {}\n".format(db_sha))
stdout.flush()
line = stdin.readline()
if not line:
log_error("Network error.")
log_error("Unable to get any data from the other endpoint.")
log_error("This problem may be transient, please retry.")
log_error("Hint: did you correctly setup the SERVERNAME variable")
log_error("on your client? Did you add an entry for it in your ssh")
log_error("configuration file?")
log_tags_and_fail('Network error', "handshake", "network", False, "retry")
protocol = re.compile('^protocol (.+)$').match(line)
if not protocol or protocol.group(1) != PROTOCOL_VERSION:
log_error('Wrong protocol version.')
log_error('The same version of syncmaildir must be used on '+
'both endpoints')
log_tags_and_fail('Protocol version mismatch', "handshake", "protocol-mismatch", True)
line = stdin.readline()
if not line:
log_error("The client disconnected during handshake")
log_tags_and_fail('Network error', "handshake", "network", False, "retry")
sha = re.compile(r'^dbfile (\S+)$').match(line)
if not sha or sha.group(1) != db_sha:
log_error('Local dbfile and remote db file differ.')
log_error('Remove both files and push/pull again.')
log_tags_and_fail('Database mismatch', "handshake", "db-mismatch", True, "run(rm "+
quote(dbfile)+")")
def dbfile_name(endpoint, mailboxes):
endpoint = endpoint.rstrip('/')
mailboxes = mailboxes.rstrip('/')
subprocess.check_call([MDDIFF, '--mkdir-p', os.path.expanduser('~/.smd/')])
return os.path.expanduser('~/.smd/{}__{}.db.txt'.format(
endpoint.replace('/', '_'),
mailboxes.replace('/', '_').replace('%', '_')
))
def receive_delta(inf):
cmds = []
while True:
line = inf.readline()
if line and line.strip() != "END":
cmds.append(line)
if not line or line.strip() == "END":
break
if line.strip() != "END":
log_error('Unable to receive a complete diff')
log_tags("receive-delta", "network", False, "retry")
error("network error while receiving delta")
return cmds
def homefy(filename):
return os.path.expanduser("~/"+filename)
def execute_add(name, hsha, bsha):
dir, basename = os.path.split(name)
# The real smd creates symlinks from workarea to the target
# directory, I dunno why.
dest = homefy(name)
ex, hsha_1, bsha_1 = exists_and_sha(dest)
if ex:
if hsha_1 != hsha or bsha_1 != bsha:
log_error("Failed to add {} since a file with the same name".format(
dest))
log_error('exists but its content is different.')
log_error("Current hash {}/{}, requested hash {}/{}".format(
hsha_1, bsha_1, hsha, bsha))
log_error('To fix this problem you should rename '+dest)
log_error('Executing `cd; mv -n '+quote(name)+' '+
'FIXME: tmp_for' +'` should work.')
log_tags("mail-addition", "concurrent-mailbox-edit", True,
)
#mk_act("mv", name))
return False
return True # already there
if ':2,' in basename:
basename = basename[:basename.index(':2,')]
filenames = original_message_filenames(basename)
for filename in filenames:
orig_exists, hsha_orig, bsha_orig = exists_and_sha(filename)
assert orig_exists
if hsha_orig == hsha or bsha_orig == bsha:
os.link(filename, dest)
return True
log_error("Something seriously wrong here: we tried to link {}".format(
filename))
log_error("to {} but the hashes were wrong. We wanted {}/{}".format(
dest, hsha, bsha))
log_error("but we didn't see that in {}".format(filenames))
log_tags_and_fail('Mail corpus wrong')
# FIXME: How do we decide whether to use symlinks or not?
# Seems like syncmaildir can't cope with symlinks, so let's just
# always use hard links
return False
def execute_delete(name, hsha, bsha):
name = homefy(name)
ex, hsha_1, bsha_1 = exists_and_sha(name)
assert ex
assert hsha_1 == hsha
assert bsha_1 == bsha
os.unlink(name)
return True
def execute_copy(name_src, hsha, bsha, name_tgt):
name_src = homefy(name_src)
name_tgt = homefy(name_tgt)
ex_src, hsha_src, bsha_src = exists_and_sha(name_src)
ex_tgt, hsha_tgt, bsha_tgt = exists_and_sha(name_tgt)
# Not reproducing all logic
assert ex_src
assert not ex_tgt
assert hsha == hsha_src
assert bsha == bsha_src
if stat.S_ISLNK(os.stat(name_src).st_mode):
link_tgt = os.readlink(name_src)
os.symlink(link_tgt, name_tgt)
else:
os.link(name_src, name_tgt)
return True
def execute_error(msg):
log_error('mddiff failed: '+msg)
if msg.startswith("Unable to open directory"):
log_tags("mddiff", "directory-disappeared", false)
else:
log_tags("mddiff", "unknown", true)
# return (trace(false))
return False
def execute(cmd):
"""The main switch, dispatching actions."""
opcode = cmd.split()[0]
if opcode == "ADD":
name, hsha, bsha = re.compile(r'^ADD (\S+) (\S+) (\S+)$').match(cmd).groups()
name = url_decode(name)
mkdir_p(name)
return execute_add(name, hsha, bsha)
elif opcode == "DELETE":
name, hsha, bsha = re.compile(r'^DELETE (\S+) (\S+) (\S+)$').match(cmd).groups()
name = url_decode(name)
mkdir_p(name)
return execute_delete(name, hsha, bsha)
elif opcode == "COPY":
name_src, hsha, bsha, name_tgt = re.compile(
r'COPY (\S+) (\S+) (\S+) TO (\S+)$').match(cmd).groups()
name_src = url_decode(name_src)
name_tgt = url_decode(name_tgt)
mkdir_p(name_src)
mkdir_p(name_tgt)
return execute_copy(name_src, hsha, bsha, name_tgt)
elif opcode in ['REPLACEHEADER', 'COPYBODY', 'REPLACE']:
log_internal_error_and_fail(opcode + ' was called: ' + cmd)
return False
elif opcode == "ERROR":
msg = cmd[cmd.index(' ')+1:]
return execute_error(msg)
else:
error("Unknown opcode " + opcode)
return False
def main():
parser = argparse.ArgumentParser(description="")
parser.add_argument('-v', '--verbose', action='store_true', default=False)
parser.add_argument('-d', '--dry-run', action='store_true', default=False)
parser.add_argument('-t', '--translator', type=str, default='cat')
parser.add_argument('endpoint')
parser.add_argument('mailboxes')
opts = parser.parse_args()
set_translator(opts.translator)
read_message_ids()
dbfile = dbfile_name(opts.endpoint, opts.mailboxes)
xdelta = dbfile + '.xdelta'
newdb = dbfile + '.new'
if opts.mailboxes[0] == '/':
log_error("Absolute paths are not supported: " + opts.mailboxes)
log_tags_and_fail("Absolute path detected", "main", "mailbox-has--absolute-path", True)
handshake(dbfile)
commands = receive_delta(stdin)
for cmd in commands:
try:
rc = execute(cmd)
# Just wrap the whole thing in try-except to abort "cleanly"
except Exception as e:
log_error("Got an exception when processing {}: {}".format(cmd.strip(), str(e)))
log_error(traceback.format_exc())
rc = False
if not rc:
stdout.write('ABORT\n')
stdout.flush()
sys.exit(3)
# if len(get_full_email_queue) > queue_max_len:
# process_pending_queue()
# some commands may still be in the queue, we fire them now
# process_pending_queue()
# we commit and update the dbfile
stdout.write('COMMIT\n')
stdout.flush()
receive(stdin, xdelta)
rc = subprocess.call([XDELTA, 'patch', xdelta, dbfile, newdb])
if rc != 0 and rc != 256:
log_error('Unable to apply delta to dbfile.')
stdout.write('ABORT\n')
stdout.flush()
sys.exit(4)
try:
os.rename(newdb, dbfile)
except OSError:
log_error('Unable to rename ' + newdb + ' to ' + dbfile)
stdout.write('ABORT\n')
stdout.flush()
sys.exit(5)
os.unlink(xdelta)
stdout.write('DONE\n')
stdout.flush()
#log_tag('stats::new-mails(' + statistics.added +
#'), del-mails(' + statistics.removed + ')')
CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'
encode_re = '([^{0}])'.format(CHARSET)
def encode_one_char(match):
return('%{:02x}'.format(ord(match.group(1))))
def encode_for_fs(str):
return re.sub(encode_re,encode_one_char, str,0)
def mangle_message_id(msg_id):
"""
Return a mangled version of the message id, suitable for use as a filename.
"""
MAX_LENGTH = 143
FLAGS_LENGTH = 8 # :2,S...??
encoded = encode_for_fs(msg_id)
if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:
return encoded
SHA_LENGTH = 8
TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH
PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2
prefix = encoded[:PREFIX_LENGTH]
suffix = encoded[-SUFFIX_LENGTH:]
sha = hashlib.sha256()
sha.update(encoded)
return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]
MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES = {}
DB = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)
def read_message_ids():
# We can't base this on tags at all because tags aren't applied yet
querystr = '*'
q_new = notmuch.Query(DB, querystr)
q_new.set_sort(notmuch.Query.SORT.UNSORTED)
for msg in q_new.search_messages():
mangled_id = mangle_message_id(msg.get_message_id())
fiter = msg.get_filenames()
# list(fiter) gives me a NotInitializedException????
filenames = []
while True:
try:
filename = next(fiter)
filenames.append(filename)
except StopIteration:
break
MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_id] = filenames
def original_message_filenames(mangled_filename):
if mangled_filename not in MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES:
log_error("{} not in notmuch. Trying to tag nonexistant message?".format(
mangled_filename))
return MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_filename]
if __name__ == '__main__':
try:
main()
except Exception as e:
log_error(str(e))
log_error(traceback.format_exc())
sys.exit(6)
^ permalink raw reply [flat|nested] 6+ messages in thread