unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/5] lei q + lcat: support --format=text output
Date: Tue, 27 Apr 2021 11:07:53 +0000	[thread overview]
Message-ID: <20210427110753.24609-6-e@80x24.org> (raw)
In-Reply-To: <20210427110753.24609-1-e@80x24.org>

This is mainly for "lei lcat" where it's the default,
but I find it useful anyways compared to the JSON view.

Colors are loaded from ~/.config/lei/config, and fall back
to using diff colors from a normal git config
(e.g. ~/.gitconfig).
---
 MANIFEST                       |   1 +
 lib/PublicInbox/Hval.pm        |   2 +-
 lib/PublicInbox/LeiLcat.pm     |   2 +-
 lib/PublicInbox/LeiToMail.pm   |  63 ++++++++-
 lib/PublicInbox/LeiViewText.pm | 237 +++++++++++++++++++++++++++++++++
 lib/PublicInbox/ViewDiff.pm    |   4 +-
 6 files changed, 301 insertions(+), 8 deletions(-)
 create mode 100644 lib/PublicInbox/LeiViewText.pm

diff --git a/MANIFEST b/MANIFEST
index d3b46f8b..5933ddf4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -218,6 +218,7 @@ lib/PublicInbox/LeiSucks.pm
 lib/PublicInbox/LeiTag.pm
 lib/PublicInbox/LeiToMail.pm
 lib/PublicInbox/LeiUp.pm
+lib/PublicInbox/LeiViewText.pm
 lib/PublicInbox/LeiXSearch.pm
 lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index eab4738e..00b3c8b4 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -34,7 +34,7 @@ my %escape_sequence = (
 	"\x7f" => '\\x7f', # DEL
 );
 
-my %xhtml_map = (
+our %xhtml_map = (
 	'"' => '&#34;',
 	'&' => '&#38;',
 	"'" => '&#39;',
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
index f10452be..87729acf 100644
--- a/lib/PublicInbox/LeiLcat.pm
+++ b/lib/PublicInbox/LeiLcat.pm
@@ -109,7 +109,7 @@ sub lei_lcat {
 	$opt->{sort} //= 'relevance';
 	$mset_opt{relevance} = 1;
 	$lei->{mset_opt} = \%mset_opt;
-	$opt->{'format'} //= 'mboxrd' unless defined($opt->{output});
+	$opt->{'format'} //= 'text' unless defined($opt->{output});
 	if ($lei->{opt}->{stdin}) {
 		return $lei->fail(<<'') if @argv;
 no args allowed on command-line with --stdin
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 8b2f82dc..fa3af710 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -309,6 +309,26 @@ sub _imap_write_cb ($$) {
 	}
 }
 
+sub _text_write_cb ($$) {
+	my ($self, $lei) = @_;
+	my $dedupe = $lei->{dedupe};
+	$dedupe->prepare_dedupe if $dedupe;
+	my $lvt = $lei->{lvt};
+	my $ovv = $lei->{ovv};
+	$lei->{1} // die "no stdout ($ovv->{dst})"; # redirected earlier
+	$lei->{1}->autoflush(1);
+	binmode $lei->{1}, ':utf8';
+	my $lse = $lei->{lse}; # may be undef
+	sub { # for git_to_mail
+		my ($bref, $smsg, $eml) = @_;
+		$lse->xsmsg_vmd($smsg) if $lse;
+		$eml //= PublicInbox::Eml->new($bref); # copy bref
+		return if $dedupe && $dedupe->is_dup($eml, $smsg);
+		my $lk = $ovv->lock_for_scope;
+		$lei->out(${$lvt->eml_to_text($smsg, $eml)}, "\n");
+	}
+}
+
 sub write_cb { # returns a callback for git_to_mail
 	my ($self, $lei) = @_;
 	# _mbox_write_cb, _maildir_write_cb or _imap_write_cb
@@ -329,8 +349,6 @@ sub new {
 		$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
 	} elsif (substr($fmt, 0, 4) eq 'mbox') {
 		require PublicInbox::MboxReader;
-		(-d $dst || (-e _ && !-w _)) and die
-			"$dst exists and is not a writable file\n";
 		$self->can("eml2$fmt") or die "bad mbox format: $fmt\n";
 		$self->{base_type} = 'mbox';
 	} elsif ($fmt =~ /\Aimaps?\z/) { # TODO .onion support
@@ -347,9 +365,23 @@ sub new {
 		$dst = $lei->{ovv}->{dst} = $$uri; # canonicalized
 		$lei->{net} = $net;
 		$self->{base_type} = 'imap';
+	} elsif ($fmt eq 'text') {
+		require PublicInbox::LeiViewText;
+		$lei->{lvt} = PublicInbox::LeiViewText->new($lei);
+		$self->{base_type} = 'text';
 	} else {
 		die "bad mail --format=$fmt\n";
 	}
+	if ($self->{base_type} =~ /\A(?:text|mbox)\z/) {
+		(-d $dst || (-e _ && !-w _)) and die
+			"$dst exists and is not a writable file\n";
+	}
+	if ($self->{base_type} eq 'text') {
+		my @err = map {
+			defined($lei->{opt}->{$_}) ? "--$_" : ();
+		} (qw(mua save));
+		die "@err incompatible with $fmt\n" if @err;
+	}
 	$self->{dst} = $dst;
 	$lei->{dedupe} = $lei->{lss} // do {
 		my $dd_cls = 'PublicInbox::'.
@@ -429,6 +461,29 @@ sub _do_augment_imap {
 	}
 }
 
+sub _pre_augment_text {
+	my ($self, $lei) = @_;
+	my $dst = $lei->{ovv}->{dst};
+	my $out;
+	my $devfd = $lei->path_to_fd($dst) // die "bad $dst";
+	if ($devfd >= 0) {
+		$out = $lei->{$devfd};
+	} else { # normal-looking path
+		if (-p $dst) {
+			open $out, '>', $dst or die "open($dst): $!";
+		} elsif (-f _ || !-e _) {
+			# text allows augment, HTML/Atom won't
+			my $mode = $lei->{opt}->{augment} ? '>>' : '>';
+			open $out, $mode, $dst or die "open($mode, $dst): $!";
+		} else {
+			die "$dst is not a file or FIFO\n";
+		}
+	}
+	$lei->{ovv}->ovv_out_lk_init if !$lei->{ovv}->{lock_path};
+	$lei->{1} = $out;
+	undef;
+}
+
 sub _pre_augment_mbox {
 	my ($self, $lei) = @_;
 	my $dst = $lei->{ovv}->{dst};
@@ -523,8 +578,8 @@ sub pre_augment { # fast (1 disk seek), runs in same process as post_augment
 sub do_augment { # slow, runs in wq worker
 	my ($self, $lei) = @_;
 	# _do_augment_maildir, _do_augment_mbox, or _do_augment_imap
-	my $m = "_do_augment_$self->{base_type}";
-	$self->$m($lei);
+	my $m = $self->can("_do_augment_$self->{base_type}") or return;
+	$m->($self, $lei);
 }
 
 # fast (spawn compressor or mkdir), runs in same process as pre_augment
diff --git a/lib/PublicInbox/LeiViewText.pm b/lib/PublicInbox/LeiViewText.pm
new file mode 100644
index 00000000..6f5fca49
--- /dev/null
+++ b/lib/PublicInbox/LeiViewText.pm
@@ -0,0 +1,237 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# PublicInbox::Eml to (optionally colorized) text coverter for terminals
+# the non-HTML counterpart to PublicInbox::View
+package PublicInbox::LeiViewText;
+use strict;
+use v5.10.1;
+use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::MID qw(references);
+use PublicInbox::View;
+use PublicInbox::Hval;
+use PublicInbox::ViewDiff;
+use PublicInbox::Spawn qw(popen_rd);
+use Term::ANSIColor;
+
+sub _xs {
+	# xhtml_map works since we don't search for HTML ([&<>'"])
+	$_[0] =~ s/([\x7f\x00-\x1f])/$PublicInbox::Hval::xhtml_map{$1}/sge;
+}
+
+my %DEFAULT_COLOR = (
+	# mutt names, loaded from ~/.config/lei/config
+	quoted => 'blue',
+	hdrdefault => 'cyan',
+	status => 'bright_cyan', # smsg stuff
+
+	# git names and defaults, falls back to ~/.gitconfig
+	new => 'green',
+	old => 'red',
+	meta => 'bold',
+	frag => 'cyan',
+	func => undef,
+	context => undef,
+);
+
+sub my_colored {
+	my ($self, $slot) = @_; # $_[2] = buffer
+	my $val = $self->{"color.$slot"} //=
+			$self->{-leicfg}->{"color.$slot"} //
+			$self->{-gitcfg}->{"color.diff.$slot"} //
+			$self->{-gitcfg}->{"diff.color.$slot"} //
+			$DEFAULT_COLOR{$slot};
+	$val = $val->[-1] if ref($val) eq 'ARRAY';
+	if (defined $val) {
+		# git doesn't use "_", Term::ANSIColor does
+		$val =~ s/\Abright([^_])/bright_$1/i;
+		${$self->{obuf}} .= Term::ANSIColor::colored($_[2], lc $val);
+	} else {
+		${$self->{obuf}} .= $_[2];
+	}
+}
+
+sub uncolored { ${$_[0]->{obuf}} .= $_[2] }
+
+sub new {
+	my ($cls, $lei) = @_;
+	my $self = bless { %{$lei->{opt}}, -colored => \&uncolored }, $cls;
+	return $self unless $self->{color} || -t $lei->{1};
+	my $cmd = [ qw(git config -z --includes -l) ];
+	my ($r, $pid) = popen_rd($cmd, undef, { 2 => $lei->{2} });
+	my $cfg = PublicInbox::Config::config_fh_parse($r, "\0", "\n");
+	waitpid($pid, 0);
+	if ($?) {
+		$lei->err("# git-config failed, no color (non-fatal)");
+		return $self;
+	}
+	$self->{-colored} = \&my_colored;
+	$self->{-gitcfg} = $cfg;
+	$self->{-leicfg} = $lei->{cfg};
+	$self;
+}
+
+sub hdr_buf ($$) {
+	my ($self, $eml) = @_;
+	my $hbuf = '';
+	for my $f (qw(From To Cc)) {
+		for my $v ($eml->header($f)) {
+			next if $v !~ /\S/;
+			PublicInbox::View::fold_addresses($v);
+			_xs($v);
+			$hbuf .= "$f: $v\n";
+		}
+	}
+	for my $f (qw(Subject Date Newsgroups Message-ID X-Message-ID)) {
+		for my $v ($eml->header($f)) {
+			_xs($v);
+			$hbuf .= "$f: $v\n";
+		}
+	}
+	if (my @irt = $eml->header_raw('In-Reply-To')) {
+		for my $v (@irt) {
+			_xs($v);
+			$hbuf .= "In-Reply-To: $v\n";
+		}
+	} else {
+		my $refs = references($eml);
+		if (defined(my $irt = pop @$refs)) {
+			_xs($irt);
+			$hbuf .= "In-Reply-To: <$irt>\n";
+		}
+		if (@$refs) {
+			my $max = $self->{-max_cols};
+			$hbuf .= 'References: ' .
+				join("\n\t", map { '<'._xs($_).'>' } @$refs) .
+				">\n";
+		}
+	}
+	$self->{-colored}->($self, 'hdrdefault', $hbuf .= "\n");
+}
+
+sub attach_note ($$$$;$) {
+	my ($self, $ct, $p, $fn, $err) = @_;
+	my ($part, $depth, $idx) = @$p;
+	my $obuf = $self->{obuf};
+	my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
+	$$obuf .= <<EOF if $err;
+[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
+EOF
+	my $blob = $self->{-smsg}->{blob} // '';
+	$blob .= ':' if $blob ne '';
+	$$obuf .= "[-- Attachment $blob$idx ";
+	_xs($ct);
+	my $size = length($part->body);
+	my $ts = "Type: $ct, Size: $size bytes";
+	my $d = $part->header('Content-Description') // $fn // '';
+	_xs($d);
+	$$obuf .= $d eq '' ? "$ts --]\n" : "$d --]\n[-- $ts --]\n";
+	hdr_buf($self, $part) if $part->{is_submsg};
+}
+
+sub flush_text_diff ($$) {
+	my ($self, $cur) = @_;
+	my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $$cur);
+	undef $$cur; # free memory
+	my $dctx;
+	my $obuf = $self->{obuf};
+	my $colored = $self->{-colored};
+	while (defined(my $x = shift @top)) {
+		if (scalar(@top) >= 4 &&
+				$top[1] =~ $PublicInbox::ViewDiff::IS_OID &&
+				$top[0] =~ $PublicInbox::ViewDiff::IS_OID) {
+			splice(@top, 0, 4);
+			$dctx = 1;
+			$colored->($self, 'meta', $x);
+		} elsif ($dctx) {
+			# Quiet "Complex regular subexpression recursion limit"
+			# warning.  Perl will truncate matches upon hitting
+			# that limit, giving us more (and shorter) scalars than
+			# would be ideal, but otherwise it's harmless.
+			#
+			# We could replace the `+' metacharacter with `{1,100}'
+			# to limit the matches ourselves to 100, but we can
+			# let Perl do it for us, quietly.
+			no warnings 'regexp';
+
+			for my $s (split(/((?:(?:^\+[^\n]*\n)+)|
+					(?:(?:^-[^\n]*\n)+)|
+					(?:^@@ [^\n]+\n))/xsm, $x)) {
+				if (!defined($dctx)) {
+					${$self->{obuf}} .= $s;
+				} elsif ($s =~ s/\A(@@ \S+ \S+ @@\s*)//) {
+					$colored->($self, 'frag', $1);
+					$colored->($self, 'func', $s);
+				} elsif ($s =~ /\A\+/) {
+					$colored->($self, 'new', $s);
+				} elsif ($s =~ /\A-- $/sm) { # email sig starts
+					$dctx = undef;
+					${$self->{obuf}} .= $s;
+				} elsif ($s =~ /\A-/) {
+					$colored->($self, 'old', $s);
+				} else {
+					$colored->($self, 'context', $s);
+				}
+			}
+		} else {
+			${$self->{obuf}} .= $x;
+		}
+	}
+}
+
+sub add_text_buf { # callback for Eml->each_part
+	my ($p, $self) = @_;
+	my ($part, $depth, $idx) = @$p;
+	my $ct = $part->content_type || 'text/plain';
+	my $fn = $part->filename;
+	my ($s, $err) = msg_part_text($part, $ct);
+	return attach_note($self, $ct, $p, $fn) unless defined $s;
+	hdr_buf($self, $part) if $part->{is_submsg};
+	$s =~ s/\r\n/\n/sg;
+	_xs($s);
+	$s .= "\n" unless substr($s, -1, 1) eq "\n";
+	my $diff = ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms);
+	my @sections = PublicInbox::MsgIter::split_quotes($s);
+	undef $s; # free memory
+	if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
+		# badly-encoded message with $err? tell the world about it!
+		attach_note($self, $ct, $p, $fn, $err);
+		${$self->{obuf}} .= "\n";
+	}
+	my $colored = $self->{-colored};
+	for my $cur (@sections) {
+		if ($cur =~ /\A>/) {
+			$colored->($self, 'quoted', $cur);
+		} elsif ($diff) {
+			flush_text_diff($self, \$cur);
+		} else {
+			${$self->{obuf}} .= $cur;
+		}
+		undef $cur; # free memory
+	}
+}
+
+# returns an arrayref suitable for $lei->out or print
+sub eml_to_text {
+	my ($self, $smsg, $eml) = @_;
+	local $Term::ANSIColor::EACHLINE = "\n";
+	$self->{obuf} = \(my $obuf = '');
+	$self->{-smsg} = $smsg;
+	$self->{-max_cols} = ($self->{columns} //= 80) - 8; # for header wrap
+	my @h = ();
+	for my $f (qw(blob pct)) {
+		push @h, "$f:$smsg->{$f}" if defined $smsg->{$f};
+	}
+	@h = ("# @h\n") if @h;
+	for my $f (qw(kw L)) {
+		my $v = $smsg->{$f} or next;
+		push @h, "# $f:".join(',', @$v)."\n" if @$v;
+	}
+	$self->{-colored}->($self, 'status', join('', @h));
+	hdr_buf($self, $eml);
+	$eml->each_part(\&add_text_buf, $self, 1);
+	delete $self->{obuf};
+}
+
+1;
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 8fe7261f..e9a7bf69 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -30,7 +30,7 @@ my $DIFFSTAT_COMMENT =
 my $NULL_TO_BLOB = qr/^(index $OID_NULL\.\.)($OID_BLOB)\b/ms;
 my $BLOB_TO_NULL = qr/^index ($OID_BLOB)(\.\.$OID_NULL)\b/ms;
 my $BLOB_TO_BLOB = qr/^index ($OID_BLOB)\.\.($OID_BLOB)/ms;
-my $EXTRACT_DIFFS = qr/(
+our $EXTRACT_DIFFS = qr/(
 		(?:	# begin header stuff, don't capture filenames, here,
 			# but instead wait for the --- and +++ lines.
 			(?:^diff\x20--git\x20$FN\x20$FN$LF)
@@ -41,7 +41,7 @@ my $EXTRACT_DIFFS = qr/(
 		^index\x20($OID_BLOB)\.\.($OID_BLOB)$ANY*$LF
 		^---\x20($FN)$LF
 		^\+{3}\x20($FN)$LF)/msx;
-my $IS_OID = qr/\A$OID_BLOB\z/s;
+our $IS_OID = qr/\A$OID_BLOB\z/s;
 
 # link to line numbers in blobs
 sub diff_hunk ($$$$) {

      parent reply	other threads:[~2021-04-27 11:07 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-27 11:07 [PATCH 0/5] lei lcat - local cat (not lolcat :P) Eric Wong
2021-04-27 11:07 ` [PATCH 1/5] lei: add "ls-sync" command for listing sync folders Eric Wong
2021-04-27 11:07 ` [PATCH 2/5] lei blob: support retrieving attachments via $OID:$IDX Eric Wong
2021-04-27 11:07 ` [PATCH 3/5] lei: standardize on _lei_wq_eof callback for workers Eric Wong
2021-04-27 11:07 ` [PATCH 4/5] lei lcat: extract Message-IDs from URLs and show them Eric Wong
2021-04-27 11:07 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210427110753.24609-6-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).