commit 81f61a615ff8c5c5d6e270c255c15eb164f3456c Author: Andy Wingo Date: Wed Oct 15 11:49:41 2014 +0200 web: Location header is URI-reference; better URI-reference support * module/web/uri.scm (validate-uri): Add reference? keyword argument, for validating references. (build-uri): Clarify comments to indicate that the result is an absolute URI. (build-uri-reference): New interface, to build URI-references. (string->uri-reference): Rename from string->uri*. Fix fragment parsing to not include the #. (string->uri): Adapt to string->uri-reference name change. * module/web/request.scm (request-absolute-uri): Add default-scheme optional argument. Use it if the request-uri has no scheme, or error. * module/web/http.scm (write-uri): Reflow to use "when". Fix writing of URI-reference instances. (declare-uri-reference-header!): Rename from declare-relative-uri-header!. Use string->uri-reference. ("Location"): Declare as a URI-reference header, as per RFC 7231. * module/web/client.scm (open-socket-for-uri): Handle the case in which there is no URI scheme. * test-suite/tests/web-http.test: * test-suite/tests/web-uri.test: Add tests. diff --git a/module/web/client.scm b/module/web/client.scm index 3f6c45b..ef2314b 100644 --- a/module/web/client.scm +++ b/module/web/client.scm @@ -1,6 +1,6 @@ ;;; Web client -;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc. +;; Copyright (C) 2011, 2012, 2013, 2014 Free Software Foundation, Inc. ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -74,7 +74,8 @@ (delete-duplicates (getaddrinfo (uri-host uri) (cond (port => number->string) - (else (symbol->string (uri-scheme uri)))) + ((uri-scheme uri) => symbol->string) + (else (error "Not an absolute URI" uri))) (if port AI_NUMERICSERV 0)) diff --git a/module/web/http.scm b/module/web/http.scm index aa75142..a157cf0 100644 --- a/module/web/http.scm +++ b/module/web/http.scm @@ -1090,20 +1090,19 @@ three values: the method, the URI, and the version." (bad-request "Bad Request-Line: ~s" line)))) (define (write-uri uri port) - (if (uri-host uri) - (begin - (display (uri-scheme uri) port) - (display "://" port) - (if (uri-userinfo uri) - (begin - (display (uri-userinfo uri) port) - (display #\@ port))) - (display (uri-host uri) port) - (let ((p (uri-port uri))) - (if (and p (not (eqv? p 80))) - (begin - (display #\: port) - (display p port)))))) + (when (uri-host uri) + (when (uri-scheme uri) + (display (uri-scheme uri) port) + (display #\: port)) + (display "//" port) + (when (uri-userinfo uri) + (display (uri-userinfo uri) port) + (display #\@ port)) + (display (uri-host uri) port) + (let ((p (uri-port uri))) + (when (and p (not (eqv? p 80))) + (display #\: port) + (display p port)))) (let* ((path (uri-path uri)) (len (string-length path))) (cond @@ -1113,10 +1112,9 @@ three values: the method, the URI, and the version." (bad-request "Empty path and no host for URI: ~s" uri)) (else (display path port)))) - (if (uri-query uri) - (begin - (display #\? port) - (display (uri-query uri) port)))) + (when (uri-query uri) + (display #\? port) + (display (uri-query uri) port))) (define (write-request-line method uri version port) "Write the first line of an HTTP request to PORT." @@ -1226,11 +1224,11 @@ treated specially, and is just returned as a plain string." (@@ (web uri) absolute-uri?) write-uri)) -;; emacs: (put 'declare-relative-uri-header! 'scheme-indent-function 1) -(define (declare-relative-uri-header! name) +;; emacs: (put 'declare-uri-reference-header! 'scheme-indent-function 1) +(define (declare-uri-reference-header! name) (declare-header! name (lambda (str) - (or ((@@ (web uri) string->uri*) str) + (or (string->uri-reference str) (bad-header-component 'uri str))) uri? write-uri)) @@ -1519,9 +1517,9 @@ treated specially, and is just returned as a plain string." ;; (declare-integer-header! "Content-Length") -;; Content-Location = ( absoluteURI | relativeURI ) +;; Content-Location = URI-reference ;; -(declare-relative-uri-header! "Content-Location") +(declare-uri-reference-header! "Content-Location") ;; Content-MD5 = ;; @@ -1822,9 +1820,9 @@ treated specially, and is just returned as a plain string." (display (cdr pair) port))) ","))) -;; Referer = ( absoluteURI | relativeURI ) +;; Referer = URI-reference ;; -(declare-relative-uri-header! "Referer") +(declare-uri-reference-header! "Referer") ;; TE = #( t-codings ) ;; t-codings = "trailers" | ( transfer-extension [ accept-params ] ) @@ -1859,9 +1857,13 @@ treated specially, and is just returned as a plain string." entity-tag? write-entity-tag) -;; Location = absoluteURI +;; Location = URI-reference +;; +;; In RFC 2616, Location was specified as being an absolute URI. This +;; was changed in RFC 7231 to permit URI references generally, which +;; matches web reality. ;; -(declare-uri-header! "Location") +(declare-uri-reference-header! "Location") ;; Proxy-Authenticate = 1#challenge ;; diff --git a/module/web/request.scm b/module/web/request.scm index 7ced076..0a206cf 100644 --- a/module/web/request.scm +++ b/module/web/request.scm @@ -300,7 +300,8 @@ request R." (define-request-accessor user-agent #f) ;; Misc accessors -(define* (request-absolute-uri r #:optional default-host default-port) +(define* (request-absolute-uri r #:optional default-host default-port + default-scheme) "A helper routine to determine the absolute URI of a request, using the ‘host’ header and the default host and port." (let ((uri (request-uri r))) @@ -313,7 +314,10 @@ request R." (bad-request "URI not absolute, no Host header, and no default: ~s" uri))))) - (build-uri (uri-scheme uri) + (build-uri (or (uri-scheme uri) + default-scheme + (bad-request "URI not absolute and no default-port" + uri)) #:host (car host) #:port (cdr host) #:path (uri-path uri) diff --git a/module/web/uri.scm b/module/web/uri.scm index 3ab820d..063d7ee 100644 --- a/module/web/uri.scm +++ b/module/web/uri.scm @@ -36,8 +36,10 @@ uri-path uri-query uri-fragment build-uri + build-uri-reference declare-default-port! - string->uri uri->string + string->uri string->uri-reference + uri->string uri-decode uri-encode split-and-decode-uri-path encode-and-join-uri-path)) @@ -62,9 +64,10 @@ (define (positive-exact-integer? port) (and (number? port) (exact? port) (integer? port) (positive? port))) -(define (validate-uri scheme userinfo host port path query fragment) +(define* (validate-uri scheme userinfo host port path query fragment + #:key reference?) (cond - ((not (symbol? scheme)) + ((and (not reference?) (not (symbol? scheme))) (uri-error "Expected a symbol for the URI scheme: ~s" scheme)) ((and (or userinfo port) (not host)) (uri-error "Expected a host, given userinfo or port")) @@ -82,15 +85,26 @@ (define* (build-uri scheme #:key userinfo host port (path "") query fragment (validate? #t)) - "Construct a URI object. SCHEME should be a symbol, PORT -either a positive, exact integer or ‘#f’, and the rest of the -fields are either strings or ‘#f’. If VALIDATE? is true, -also run some consistency checks to make sure that the constructed URI -is valid." + "Construct a URI object. SCHEME should be a symbol, PORT either a +positive, exact integer or ‘#f’, and the rest of the fields are either +strings or ‘#f’. If VALIDATE? is true, also run some consistency checks +to make sure that the constructed object is a valid absolute URI." (if validate? (validate-uri scheme userinfo host port path query fragment)) (make-uri scheme userinfo host port path query fragment)) +(define* (build-uri-reference #:key scheme userinfo host port (path "") query + fragment (validate? #t)) + "Construct a URI object. SCHEME should be a symbol or ‘#f’, PORT +either a positive, exact integer or ‘#f’, and the rest obf the fields +are either strings or ‘#f’. If VALIDATE? is true, also run some +consistency checks to make sure that the constructed URI is a valid URI +reference (either an absolute URI or a relative reference)." + (if validate? + (validate-uri scheme userinfo host port path query fragment + #:reference? #t)) + (make-uri scheme userinfo host port path query fragment)) + ;; See RFC 3986 #3.2.2 for comments on percent-encodings, IDNA (RFC ;; 3490), and non-ASCII host names. ;; @@ -156,6 +170,10 @@ is valid." ;;; / path-absolute ;;; / path-rootless ;;; / path-empty +;;; +;;; A URI-reference is the same as URI, but where the scheme is +;;; optional. If the scheme is not present, its colon isn't present +;;; either. (define scheme-pat "[a-zA-Z][a-zA-Z0-9+.-]*") @@ -173,9 +191,9 @@ is valid." (define uri-regexp (make-regexp uri-pat)) -(define (string->uri* string) - "Parse STRING into a URI object. Return ‘#f’ if the string -could not be parsed." +(define (string->uri-reference string) + "Parse the URI reference written as STRING into a URI object. Return +‘#f’ if the string could not be parsed." (% (let ((m (regexp-exec uri-regexp string))) (if (not m) (abort)) (let ((scheme (let ((str (match:substring m 2))) @@ -183,7 +201,7 @@ could not be parsed." (authority (match:substring m 3)) (path (match:substring m 4)) (query (match:substring m 6)) - (fragment (match:substring m 7))) + (fragment (match:substring m 8))) (call-with-values (lambda () (if authority @@ -195,9 +213,9 @@ could not be parsed." #f))) (define (string->uri string) - "Parse STRING into a URI object. Return ‘#f’ if the string + "Parse STRING into an absolute URI object. Return ‘#f’ if the string could not be parsed." - (let ((uri (string->uri* string))) + (let ((uri (string->uri-reference string))) (and uri (uri-scheme uri) uri))) (define *default-ports* (make-hash-table)) diff --git a/test-suite/tests/web-http.test b/test-suite/tests/web-http.test index 45cce02..dfc9677 100644 --- a/test-suite/tests/web-http.test +++ b/test-suite/tests/web-http.test @@ -345,6 +345,14 @@ (pass-if-parse etag "W/\"foo\"" '("foo" . #f)) (pass-if-parse location "http://other-place" (build-uri 'http #:host "other-place")) + (pass-if-parse location "#foo" + (build-uri-reference #:fragment "foo")) + (pass-if-parse location "/#foo" + (build-uri-reference #:path "/" #:fragment "foo")) + (pass-if-parse location "/foo" + (build-uri-reference #:path "/foo")) + (pass-if-parse location "//server/foo" + (build-uri-reference #:host "server" #:path "/foo")) (pass-if-parse proxy-authenticate "Basic realm=\"guile\"" '((basic (realm . "guile")))) (pass-if-parse retry-after "Tue, 15 Nov 1994 08:12:31 GMT" diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test index 3d14d9d..4873d7f 100644 --- a/test-suite/tests/web-uri.test +++ b/test-suite/tests/web-uri.test @@ -1,6 +1,6 @@ ;;;; web-uri.test --- URI library -*- mode: scheme; coding: utf-8; -*- ;;;; -;;;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. +;;;; Copyright (C) 2010, 2011, 2012, 2014 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -210,7 +210,298 @@ (pass-if "file:///etc/hosts" (uri=? (string->uri "file:///etc/hosts") #:scheme 'file - #:path "/etc/hosts"))) + #:path "/etc/hosts")) + + (pass-if "http://foo#bar" + (uri=? (string->uri "http://foo#bar") + #:scheme 'http + #:host "foo" + #:path "" + #:fragment "bar")) + + (pass-if "http://foo:/#bar" + (uri=? (string->uri "http://foo:/#bar") + #:scheme 'http + #:host "foo" + #:path "/" + #:fragment "bar")) + + (pass-if "http://foo:100#bar" + (uri=? (string->uri "http://foo:100#bar") + #:scheme 'http + #:host "foo" + #:port 100 + #:path "" + #:fragment "bar")) + + (pass-if "http://foo:100/#bar" + (uri=? (string->uri "http://foo:100/#bar") + #:scheme 'http + #:host "foo" + #:port 100 + #:path "/" + #:fragment "bar")) + + (pass-if "http://foo?q#bar" + (uri=? (string->uri "http://foo?q#bar") + #:scheme 'http + #:host "foo" + #:path "" + #:query "q" + #:fragment "bar")) + + (pass-if "http://foo:/?q#bar" + (uri=? (string->uri "http://foo:/?q#bar") + #:scheme 'http + #:host "foo" + #:path "/" + #:query "q" + #:fragment "bar")) + + (pass-if "http://foo:100?q#bar" + (uri=? (string->uri "http://foo:100?q#bar") + #:scheme 'http + #:host "foo" + #:port 100 + #:path "" + #:query "q" + #:fragment "bar")) + + (pass-if "http://foo:100/?q#bar" + (uri=? (string->uri "http://foo:100/?q#bar") + #:scheme 'http + #:host "foo" + #:port 100 + #:path "/" + #:query "q" + #:fragment "bar"))) + +(with-test-prefix "string->uri-reference" + (pass-if "/foo" + (uri=? (string->uri-reference "/foo") + #:path "/foo")) + + (pass-if "ftp:/foo" + (uri=? (string->uri-reference "ftp:/foo") + #:scheme 'ftp + #:path "/foo")) + + (pass-if "ftp:foo" + (uri=? (string->uri-reference "ftp:foo") + #:scheme 'ftp + #:path "foo")) + + (pass-if "//foo/bar" + (uri=? (string->uri-reference "//foo/bar") + #:host "foo" + #:path "/bar")) + + (pass-if "ftp://foo@bar:22/baz" + (uri=? (string->uri-reference "ftp://foo@bar:22/baz") + #:scheme 'ftp + #:userinfo "foo" + #:host "bar" + #:port 22 + #:path "/baz")) + + (pass-if "//foo@bar:22/baz" + (uri=? (string->uri-reference "//foo@bar:22/baz") + #:userinfo "foo" + #:host "bar" + #:port 22 + #:path "/baz")) + + (pass-if "http://bad.host.1" + (not (string->uri-reference "http://bad.host.1"))) + + (pass-if "//bad.host.1" + (not (string->uri-reference "//bad.host.1"))) + + (pass-if "http://1.good.host" + (uri=? (string->uri-reference "http://1.good.host") + #:scheme 'http #:host "1.good.host" #:path "")) + + (pass-if "//1.good.host" + (uri=? (string->uri-reference "//1.good.host") + #:host "1.good.host" #:path "")) + + (when (memq 'socket *features*) + (pass-if "http://192.0.2.1" + (uri=? (string->uri-reference "http://192.0.2.1") + #:scheme 'http #:host "192.0.2.1" #:path "")) + + (pass-if "//192.0.2.1" + (uri=? (string->uri-reference "//192.0.2.1") + #:host "192.0.2.1" #:path "")) + + (pass-if "http://[2001:db8::1]" + (uri=? (string->uri-reference "http://[2001:db8::1]") + #:scheme 'http #:host "2001:db8::1" #:path "")) + + (pass-if "//[2001:db8::1]" + (uri=? (string->uri-reference "//[2001:db8::1]") + #:host "2001:db8::1" #:path "")) + + (pass-if "http://[2001:db8::1]:80" + (uri=? (string->uri-reference "http://[2001:db8::1]:80") + #:scheme 'http + #:host "2001:db8::1" + #:port 80 + #:path "")) + + (pass-if "//[2001:db8::1]:80" + (uri=? (string->uri-reference "//[2001:db8::1]:80") + #:host "2001:db8::1" + #:port 80 + #:path "")) + + (pass-if "http://[::ffff:192.0.2.1]" + (uri=? (string->uri-reference "http://[::ffff:192.0.2.1]") + #:scheme 'http #:host "::ffff:192.0.2.1" #:path "")) + + (pass-if "//[::ffff:192.0.2.1]" + (uri=? (string->uri-reference "//[::ffff:192.0.2.1]") + #:host "::ffff:192.0.2.1" #:path ""))) + + (pass-if "http://foo:" + (uri=? (string->uri-reference "http://foo:") + #:scheme 'http #:host "foo" #:path "")) + + (pass-if "//foo:" + (uri=? (string->uri-reference "//foo:") + #:host "foo" #:path "")) + + (pass-if "http://foo:/" + (uri=? (string->uri-reference "http://foo:/") + #:scheme 'http #:host "foo" #:path "/")) + + (pass-if "//foo:/" + (uri=? (string->uri-reference "//foo:/") + #:host "foo" #:path "/")) + + (pass-if "http://2012.jsconf.us/" + (uri=? (string->uri-reference "http://2012.jsconf.us/") + #:scheme 'http #:host "2012.jsconf.us" #:path "/")) + + (pass-if "//2012.jsconf.us/" + (uri=? (string->uri-reference "//2012.jsconf.us/") + #:host "2012.jsconf.us" #:path "/")) + + (pass-if "http://foo:not-a-port" + (not (string->uri-reference "http://foo:not-a-port"))) + + (pass-if "//foo:not-a-port" + (not (string->uri-reference "//foo:not-a-port"))) + + (pass-if "http://:10" + (not (string->uri-reference "http://:10"))) + + (pass-if "//:10" + (not (string->uri-reference "//:10"))) + + (pass-if "http://foo@" + (not (string->uri-reference "http://foo@"))) + + (pass-if "//foo@" + (not (string->uri-reference "//foo@"))) + + (pass-if "file:/" + (uri=? (string->uri-reference "file:/") + #:scheme 'file + #:path "/")) + + (pass-if "/" + (uri=? (string->uri-reference "/") + #:path "/")) + + (pass-if "foo" + (uri=? (string->uri-reference "foo") + #:path "foo")) + + (pass-if "file:/etc/hosts" + (uri=? (string->uri-reference "file:/etc/hosts") + #:scheme 'file + #:path "/etc/hosts")) + + (pass-if "/etc/hosts" + (uri=? (string->uri-reference "/etc/hosts") + #:path "/etc/hosts")) + + (pass-if "file:///etc/hosts" + (uri=? (string->uri-reference "file:///etc/hosts") + #:scheme 'file + #:path "/etc/hosts")) + + (pass-if "///etc/hosts" + (uri=? (string->uri-reference "///etc/hosts") + #:path "/etc/hosts")) + + (pass-if "/foo#bar" + (uri=? (string->uri-reference "/foo#bar") + #:path "/foo" + #:fragment "bar")) + + (pass-if "//foo#bar" + (uri=? (string->uri-reference "//foo#bar") + #:host "foo" + #:path "" + #:fragment "bar")) + + (pass-if "//foo:/#bar" + (uri=? (string->uri-reference "//foo:/#bar") + #:host "foo" + #:path "/" + #:fragment "bar")) + + (pass-if "//foo:100#bar" + (uri=? (string->uri-reference "//foo:100#bar") + #:host "foo" + #:port 100 + #:path "" + #:fragment "bar")) + + (pass-if "//foo:100/#bar" + (uri=? (string->uri-reference "//foo:100/#bar") + #:host "foo" + #:port 100 + #:path "/" + #:fragment "bar")) + + (pass-if "/foo?q#bar" + (uri=? (string->uri-reference "/foo?q#bar") + #:path "/foo" + #:query "q" + #:fragment "bar")) + + (pass-if "//foo?q#bar" + (uri=? (string->uri-reference "//foo?q#bar") + #:host "foo" + #:path "" + #:query "q" + #:fragment "bar")) + + (pass-if "//foo:/?q#bar" + (uri=? (string->uri-reference "//foo:/?q#bar") + #:host "foo" + #:path "/" + #:query "q" + #:fragment "bar")) + + (pass-if "//foo:100?q#bar" + (uri=? (string->uri-reference "//foo:100?q#bar") + #:host "foo" + #:port 100 + #:path "" + #:query "q" + #:fragment "bar")) + + (pass-if "//foo:100/?q#bar" + (uri=? (string->uri-reference "//foo:100/?q#bar") + #:host "foo" + #:port 100 + #:path "/" + #:query "q" + #:fragment "bar"))) (with-test-prefix "uri->string" (pass-if "ftp:" @@ -225,30 +516,78 @@ (equal? "ftp://foo/bar" (uri->string (string->uri "ftp://foo/bar")))) + (pass-if "//foo/bar" + (equal? "//foo/bar" + (uri->string (string->uri-reference "//foo/bar")))) + (pass-if "ftp://foo@bar:22/baz" (equal? "ftp://foo@bar:22/baz" (uri->string (string->uri "ftp://foo@bar:22/baz")))) + (pass-if "//foo@bar:22/baz" + (equal? "//foo@bar:22/baz" + (uri->string (string->uri-reference "//foo@bar:22/baz")))) + (when (memq 'socket *features*) (pass-if "http://192.0.2.1" (equal? "http://192.0.2.1" (uri->string (string->uri "http://192.0.2.1")))) + (pass-if "//192.0.2.1" + (equal? "//192.0.2.1" + (uri->string (string->uri-reference "//192.0.2.1")))) + (pass-if "http://[2001:db8::1]" (equal? "http://[2001:db8::1]" (uri->string (string->uri "http://[2001:db8::1]")))) + (pass-if "//[2001:db8::1]" + (equal? "//[2001:db8::1]" + (uri->string (string->uri-reference "//[2001:db8::1]")))) + (pass-if "http://[::ffff:192.0.2.1]" (equal? "http://[::ffff:192.0.2.1]" - (uri->string (string->uri "http://[::ffff:192.0.2.1]"))))) + (uri->string (string->uri "http://[::ffff:192.0.2.1]")))) + + (pass-if "//[::ffff:192.0.2.1]" + (equal? "//[::ffff:192.0.2.1]" + (uri->string (string->uri-reference "//[::ffff:192.0.2.1]"))))) (pass-if "http://foo:" (equal? "http://foo" (uri->string (string->uri "http://foo:")))) + (pass-if "//foo" + (equal? "//foo" + (uri->string (string->uri-reference "//foo")))) + (pass-if "http://foo:/" (equal? "http://foo/" - (uri->string (string->uri "http://foo:/"))))) + (uri->string (string->uri "http://foo:/")))) + + (pass-if "//foo:/" + (equal? "//foo/" + (uri->string (string->uri-reference "//foo:/")))) + + (pass-if "/" + (equal? "/" + (uri->string (string->uri-reference "/")))) + + (pass-if "/foo" + (equal? "/foo" + (uri->string (string->uri-reference "/foo")))) + + (pass-if "/foo/" + (equal? "/foo/" + (uri->string (string->uri-reference "/foo/")))) + + (pass-if "/foo/?bar#baz" + (equal? "/foo/?bar#baz" + (uri->string (string->uri-reference "/foo/?bar#baz")))) + + (pass-if "foo/?bar#baz" + (equal? "foo/?bar#baz" + (uri->string (string->uri-reference "foo/?bar#baz"))))) (with-test-prefix "decode" (pass-if "foo%20bar"