From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [RFC] www: set "<!DOCTYPE html>" everywhere
Date: Sat, 27 Apr 2019 21:23:34 +0000 [thread overview]
Message-ID: <20190427212334.uhc2z4ju6tivnrbl@whir> (raw)
I'm no fan of the "Living Standard" quicksand that is HTML 5
(or wasting 15 bytes on every response). However, being easy
to validate everything with tidy(1) seems alright...
t/check-www-inbox.perl now runs tidy(1) for every text/html
response, now.
---
Documentation/txt2pre | 2 +-
lib/PublicInbox/Mbox.pm | 2 +-
lib/PublicInbox/Unsubscribe.pm | 2 +-
lib/PublicInbox/WWW.pm | 2 +-
lib/PublicInbox/WwwStream.pm | 2 +-
t/check-www-inbox.perl | 76 ++++++++++++++++++++--------------
6 files changed, 50 insertions(+), 36 deletions(-)
diff --git a/Documentation/txt2pre b/Documentation/txt2pre
index 4c4b2ca..4ad2372 100755
--- a/Documentation/txt2pre
+++ b/Documentation/txt2pre
@@ -19,7 +19,7 @@ $str = $l->linkify_1($str);
$str = ascii_html($str);
$str = $l->linkify_2($str);
-print '<html><head>',
+print '<!DOCTYPE html><html><head>',
qq(<meta\nhttp-equiv="Content-Type"\ncontent="text/html; charset=utf-8"\n/>),
"<title>$title</title>",
"</head><body><pre>", $str , '</pre></body></html>';
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 78dbe27..1e85573 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -222,7 +222,7 @@ sub need_gzip {
my $fh = $_[0]->([501, ['Content-Type' => 'text/html']]);
my $title = 'gzipped mbox not available';
$fh->write(<<EOF);
-<html><head><title>$title</title><body><pre>$title
+<!DOCTYPE html><html><head><title>$title</title><body><pre>$title
The administrator needs to install the IO::Compress::Gzip Perl module
to support gzipped mboxes.
<a href="../">Return to index</a></pre></body></html>
diff --git a/lib/PublicInbox/Unsubscribe.pm b/lib/PublicInbox/Unsubscribe.pm
index 11a347e..1e66011 100644
--- a/lib/PublicInbox/Unsubscribe.pm
+++ b/lib/PublicInbox/Unsubscribe.pm
@@ -134,7 +134,7 @@ sub finalize_unsub { # on POST
sub r {
my ($self, $code, $title, @body) = @_;
- [ $code, [ @CT_HTML ], [
+ [ $code, [ @CT_HTML ], [ '<!DOCTYPE html>' .
"<html><head><title>$title</title></head><body><pre>".
join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'.
"<pre>This page is available under AGPL-3.0+\n" .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6e69001..6e46caa 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -314,7 +314,7 @@ sub searcher {
sub need_search {
my ($ctx) = @_;
my $msg = <<EOF;
-<html><head><title>Search not available for this
+<!DOCTYPE html><html><head><title>Search not available for this
public-inbox</title><body><pre>Search is not available for this public-inbox
<a href="../">Return to index</a></pre></body></html>
EOF
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 8b79923..811f6bc 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -64,7 +64,7 @@ sub _html_top ($) {
} else {
$top = '<pre>' . $top . "\n" . $links . '</pre>';
}
- "<html><head><title>$title</title>" .
+ "<!DOCTYPE html><html><head><title>$title</title>" .
"<link\nrel=alternate\ntitle=\"Atom feed\"\n".
"href=\"$atom\"\ntype=\"application/atom+xml\"/>" .
$ctx->{www}->style($upfx) .
diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl
index db292c5..0cab68e 100644
--- a/t/check-www-inbox.perl
+++ b/t/check-www-inbox.perl
@@ -30,37 +30,14 @@ my %opts = (
GetOptions(%opts) or die "bad command-line args\n$usage";
my $root_url = shift or die $usage;
-chomp(my $xmlstarlet = which('xmlstarlet'));
-my $atom_check = eval {
- my $cmd = [ qw(xmlstarlet val -e -) ];
- sub {
- my ($in, $out, $err) = @_;
- use autodie;
- open my $in_fh, '+>', undef;
- open my $out_fh, '+>', undef;
- open my $err_fh, '+>', undef;
- print $in_fh $$in;
- $in_fh->flush;
- sysseek($in_fh, 0, 0);
- my $rdr = {
- 0 => fileno($in_fh),
- 1 => fileno($out_fh),
- 2 => fileno($err_fh),
- };
- my $pid = spawn($cmd, undef, $rdr);
- defined $pid or die "spawn failure: $!";
- while (waitpid($pid, 0) != $pid) {
- next if $!{EINTR};
- warn "waitpid(xmlstarlet, $pid) $!";
- return $!;
- }
- sysseek($out_fh, 0, 0);
- sysread($out_fh, $$out, -s $out_fh);
- sysseek($err_fh, 0, 0);
- sysread($err_fh, $$err, -s $err_fh);
- $?
- }
-} if $xmlstarlet;
+my $xmlstarlet = which('xmlstarlet');
+my $atom_check = cmd_check([ $xmlstarlet, qw(val -e -) ]) if $xmlstarlet;
+
+# FIXME: highlight creates empty spans:
+my @TIDY_OPT = qw(--drop-empty-elements 0);
+
+my $tidy = which('tidy');
+my $tidy_check = cmd_check([ $tidy, qw(-e -q), @TIDY_OPT ]) if $tidy;
my %workers;
$SIG{INT} = sub { exit 130 };
@@ -205,5 +182,42 @@ sub worker_loop {
my $c = Dumper($o);
warn "bad: $u $c\n";
}
+ if ($tidy_check) {
+ my $raw = $r->decoded_content;
+ my ($out, $err) = ('', '');
+ my $fail = $tidy_check->(\$raw, \$out, \$err);
+ warn "Tidy ($fail) - $u - <1:$out> <2:$err>\n" if $fail;
+ }
+ }
+}
+
+sub cmd_check {
+ my ($cmd) = @_;
+ sub {
+ my ($in, $out, $err) = @_;
+ use autodie;
+ open my $in_fh, '+>', undef;
+ open my $out_fh, '+>', undef;
+ open my $err_fh, '+>', undef;
+ print $in_fh $$in;
+ $in_fh->flush;
+ sysseek($in_fh, 0, 0);
+ my $rdr = {
+ 0 => fileno($in_fh),
+ 1 => fileno($out_fh),
+ 2 => fileno($err_fh),
+ };
+ my $pid = spawn($cmd, undef, $rdr);
+ defined $pid or die "spawn failure: $!";
+ while (waitpid($pid, 0) != $pid) {
+ next if $!{EINTR};
+ warn "waitpid($cmd->[0], $pid) $!";
+ return $!;
+ }
+ sysseek($out_fh, 0, 0);
+ sysread($out_fh, $$out, -s $out_fh);
+ sysseek($err_fh, 0, 0);
+ sysread($err_fh, $$err, -s $err_fh);
+ $?
}
}
--
EW
next reply other threads:[~2019-04-27 21:23 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-27 21:23 Eric Wong [this message]
2019-04-28 22:32 ` [RFC] www: set "<!DOCTYPE html>" everywhere Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190427212334.uhc2z4ju6tivnrbl@whir \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).