unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [RFC] www: set "<!DOCTYPE html>" everywhere
Date: Sat, 27 Apr 2019 21:23:34 +0000	[thread overview]
Message-ID: <20190427212334.uhc2z4ju6tivnrbl@whir> (raw)

I'm no fan of the "Living Standard" quicksand that is HTML 5
(or wasting 15 bytes on every response).  However, being easy
to validate everything with tidy(1) seems alright...

t/check-www-inbox.perl now runs tidy(1) for every text/html
response, now.
---
 Documentation/txt2pre          |  2 +-
 lib/PublicInbox/Mbox.pm        |  2 +-
 lib/PublicInbox/Unsubscribe.pm |  2 +-
 lib/PublicInbox/WWW.pm         |  2 +-
 lib/PublicInbox/WwwStream.pm   |  2 +-
 t/check-www-inbox.perl         | 76 ++++++++++++++++++++--------------
 6 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/Documentation/txt2pre b/Documentation/txt2pre
index 4c4b2ca..4ad2372 100755
--- a/Documentation/txt2pre
+++ b/Documentation/txt2pre
@@ -19,7 +19,7 @@ $str = $l->linkify_1($str);
 $str = ascii_html($str);
 $str = $l->linkify_2($str);
 
-print '<html><head>',
+print '<!DOCTYPE html><html><head>',
   qq(<meta\nhttp-equiv="Content-Type"\ncontent="text/html; charset=utf-8"\n/>),
   "<title>$title</title>",
   "</head><body><pre>",  $str , '</pre></body></html>';
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 78dbe27..1e85573 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -222,7 +222,7 @@ sub need_gzip {
 	my $fh = $_[0]->([501, ['Content-Type' => 'text/html']]);
 	my $title = 'gzipped mbox not available';
 	$fh->write(<<EOF);
-<html><head><title>$title</title><body><pre>$title
+<!DOCTYPE html><html><head><title>$title</title><body><pre>$title
 The administrator needs to install the IO::Compress::Gzip Perl module
 to support gzipped mboxes.
 <a href="../">Return to index</a></pre></body></html>
diff --git a/lib/PublicInbox/Unsubscribe.pm b/lib/PublicInbox/Unsubscribe.pm
index 11a347e..1e66011 100644
--- a/lib/PublicInbox/Unsubscribe.pm
+++ b/lib/PublicInbox/Unsubscribe.pm
@@ -134,7 +134,7 @@ sub finalize_unsub { # on POST
 
 sub r {
 	my ($self, $code, $title, @body) = @_;
-	[ $code, [ @CT_HTML ], [
+	[ $code, [ @CT_HTML ], [ '<!DOCTYPE html>' .
 		"<html><head><title>$title</title></head><body><pre>".
 		join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'.
 		"<pre>This page is available under AGPL-3.0+\n" .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6e69001..6e46caa 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -314,7 +314,7 @@ sub searcher {
 sub need_search {
 	my ($ctx) = @_;
 	my $msg = <<EOF;
-<html><head><title>Search not available for this
+<!DOCTYPE html><html><head><title>Search not available for this
 public-inbox</title><body><pre>Search is not available for this public-inbox
 <a href="../">Return to index</a></pre></body></html>
 EOF
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 8b79923..811f6bc 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -64,7 +64,7 @@ sub _html_top ($) {
 	} else {
 		$top = '<pre>' . $top . "\n" . $links . '</pre>';
 	}
-	"<html><head><title>$title</title>" .
+	"<!DOCTYPE html><html><head><title>$title</title>" .
 		"<link\nrel=alternate\ntitle=\"Atom feed\"\n".
 		"href=\"$atom\"\ntype=\"application/atom+xml\"/>" .
 	        $ctx->{www}->style($upfx) .
diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl
index db292c5..0cab68e 100644
--- a/t/check-www-inbox.perl
+++ b/t/check-www-inbox.perl
@@ -30,37 +30,14 @@ my %opts = (
 GetOptions(%opts) or die "bad command-line args\n$usage";
 my $root_url = shift or die $usage;
 
-chomp(my $xmlstarlet = which('xmlstarlet'));
-my $atom_check = eval {
-	my $cmd = [ qw(xmlstarlet val -e -) ];
-	sub {
-		my ($in, $out, $err) = @_;
-		use autodie;
-		open my $in_fh, '+>', undef;
-		open my $out_fh, '+>', undef;
-		open my $err_fh, '+>', undef;
-		print $in_fh $$in;
-		$in_fh->flush;
-		sysseek($in_fh, 0, 0);
-		my $rdr = {
-			0 => fileno($in_fh),
-			1 => fileno($out_fh),
-			2 => fileno($err_fh),
-		};
-		my $pid = spawn($cmd, undef, $rdr);
-		defined $pid or die "spawn failure: $!";
-		while (waitpid($pid, 0) != $pid) {
-			next if $!{EINTR};
-			warn "waitpid(xmlstarlet, $pid) $!";
-			return $!;
-		}
-		sysseek($out_fh, 0, 0);
-		sysread($out_fh, $$out, -s $out_fh);
-		sysseek($err_fh, 0, 0);
-		sysread($err_fh, $$err, -s $err_fh);
-		$?
-	}
-} if $xmlstarlet;
+my $xmlstarlet = which('xmlstarlet');
+my $atom_check = cmd_check([ $xmlstarlet, qw(val -e -) ]) if $xmlstarlet;
+
+# FIXME: highlight creates empty spans:
+my @TIDY_OPT = qw(--drop-empty-elements 0);
+
+my $tidy = which('tidy');
+my $tidy_check = cmd_check([ $tidy, qw(-e -q), @TIDY_OPT ]) if $tidy;
 
 my %workers;
 $SIG{INT} = sub { exit 130 };
@@ -205,5 +182,42 @@ sub worker_loop {
 			my $c = Dumper($o);
 			warn "bad: $u $c\n";
 		}
+		if ($tidy_check) {
+			my $raw = $r->decoded_content;
+			my ($out, $err) = ('', '');
+			my $fail = $tidy_check->(\$raw, \$out, \$err);
+			warn "Tidy ($fail) - $u - <1:$out> <2:$err>\n" if $fail;
+		}
+	}
+}
+
+sub cmd_check {
+	my ($cmd) = @_;
+	sub {
+		my ($in, $out, $err) = @_;
+		use autodie;
+		open my $in_fh, '+>', undef;
+		open my $out_fh, '+>', undef;
+		open my $err_fh, '+>', undef;
+		print $in_fh $$in;
+		$in_fh->flush;
+		sysseek($in_fh, 0, 0);
+		my $rdr = {
+			0 => fileno($in_fh),
+			1 => fileno($out_fh),
+			2 => fileno($err_fh),
+		};
+		my $pid = spawn($cmd, undef, $rdr);
+		defined $pid or die "spawn failure: $!";
+		while (waitpid($pid, 0) != $pid) {
+			next if $!{EINTR};
+			warn "waitpid($cmd->[0], $pid) $!";
+			return $!;
+		}
+		sysseek($out_fh, 0, 0);
+		sysread($out_fh, $$out, -s $out_fh);
+		sysseek($err_fh, 0, 0);
+		sysread($err_fh, $$err, -s $err_fh);
+		$?
 	}
 }
-- 
EW

             reply	other threads:[~2019-04-27 21:23 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-27 21:23 Eric Wong [this message]
2019-04-28 22:32 ` [RFC] www: set "<!DOCTYPE html>" everywhere Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190427212334.uhc2z4ju6tivnrbl@whir \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).