From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] nntp: do not double-encode UTF-8 body
Date: Tue, 14 Jun 2016 06:57:29 +0000 [thread overview]
Message-ID: <20160614065729.2449-1-e@80x24.org> (raw)
Or whatever the appropriate Perl terminology, is...
And we will need to do something appropriate for other
encodings, too. I still barely understand Perl Unicode
despite attempting to understand the docs over the years..
---
lib/PublicInbox/NNTP.pm | 17 ++++++++++++-----
t/nntpd.t | 2 ++
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index e4e3de4..e868321 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -512,6 +512,12 @@ sub set_art {
$self->{article} = $art if defined $art && $art =~ /\A\d+\z/;
}
+sub _header ($) {
+ my $hdr = $_[0]->header_obj->as_string;
+ utf8::encode($hdr);
+ $hdr
+}
+
sub cmd_article ($;$) {
my ($self, $art) = @_;
my $r = art_lookup($self, $art, 1);
@@ -519,7 +525,7 @@ sub cmd_article ($;$) {
my ($n, $mid, $s) = @$r;
set_art($self, $art);
more($self, "220 $n <$mid> article retrieved - head and body follow");
- do_more($self, $s->header_obj->as_string);
+ do_more($self, _header($s));
do_more($self, "\r\n");
simple_body_write($self, $s);
}
@@ -531,7 +537,7 @@ sub cmd_head ($;$) {
my ($n, $mid, $s) = @$r;
set_art($self, $art);
more($self, "221 $n <$mid> article retrieved - head follows");
- do_more($self, $s->header_obj->as_string);
+ do_more($self, _header($s));
'.'
}
@@ -738,6 +744,7 @@ sub hdr_searchmsg ($$$$) {
foreach my $s (@$msgs) {
$tmp .= $s->num . ' ' . $s->$field . "\r\n";
}
+ utf8::encode($tmp);
do_more($self, $tmp);
# -1 to adjust for implicit increment in long_response
$$i = $nr ? $$i + $nr - 1 : long_response_limit;
@@ -826,7 +833,7 @@ sub over_line ($$) {
my ($num, $smsg) = @_;
# n.b. field access and procedural calls can be
# 10%-15% faster than OO method calls:
- join("\t", $num,
+ my $s = join("\t", $num,
$smsg->{subject},
$smsg->{from},
PublicInbox::SearchMsg::date($smsg),
@@ -834,6 +841,8 @@ sub over_line ($$) {
$smsg->{references},
PublicInbox::SearchMsg::bytes($smsg),
PublicInbox::SearchMsg::lines($smsg));
+ utf8::encode($s);
+ $s
}
sub cmd_over ($;$) {
@@ -896,7 +905,6 @@ sub cmd_xpath ($$) {
sub res ($$) {
my ($self, $line) = @_;
- utf8::encode($line);
do_write($self, $line . "\r\n");
}
@@ -931,7 +939,6 @@ use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0;
sub do_more ($$) {
my ($self, $data) = @_;
- utf8::encode($data);
if (MSG_MORE && !$self->{write_buf_size}) {
my $n = send($self->{sock}, $data, MSG_MORE);
if (defined $n) {
diff --git a/t/nntpd.t b/t/nntpd.t
index 5f4ba57..5875b73 100644
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@ -175,6 +175,8 @@ EOF
is_deeply($n->head(1), $n->head('<nntp@example.com>'), 'HEAD OK');
is_deeply($n->body(1), $n->body('<nntp@example.com>'), 'BODY OK');
+ is($n->body(1)->[0], "This is a test message for El\xc3\xa9anor\n",
+ 'body really matches');
my $art = $n->article(1);
is(ref($art), 'ARRAY', 'got array for ARTICLE');
is_deeply($art, $n->article('<nntp@example.com>'), 'ARTICLE OK');
reply other threads:[~2016-06-14 6:57 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160614065729.2449-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).