;;; GNU Guix web site ;;; Copyright © 2019 Florian Pelz ;;; ;;; This file is part of the GNU Guix web site. ;;; ;;; The GNU Guix web site is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU Affero General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; The GNU Guix web site is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU Affero General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public License ;;; along with the GNU Guix web site. If not, see . (use-modules (ice-9 getopt-long) (ice-9 match) (ice-9 peg) (ice-9 receive) (ice-9 regex) (ice-9 textual-ports) (srfi srfi-1) ;lists (srfi srfi-9)) ;records ;;; This script imitates xgettext, but combines nested s-expressions ;;; in the input Scheme files to a single msgstr in the PO file. It ;;; works by first reading the keywords specified on the command-line, ;;; then dealing with the remaining options using (ice-9 getopt-long). ;;; Then, it parses each Scheme file in the POTFILES file specified ;;; with --files-from and constructs po entries from it. For parsing, ;;; a PEG is used instead of Scheme’s read, because we can extract ;;; comments with it. The po entries are written to the PO file ;;; specified with the --output option. Scheme code can then use the ;;; (sexp-xgettext) module to deconstruct the msgids looked up in the ;;; PO file via gettext. (define (pk a) (begin (write a) (newline) a)) (define-record-type (make-keyword-spec id sg pl c total xcomment) keyword-spec? (id keyword-spec-id) ;identifier (sg keyword-spec-sg) ;arg with singular (pl keyword-spec-pl) ;arg with plural (c keyword-spec-c) ;arg with msgctxt or 'mixed if sg is mixed msgctxt|singular (total keyword-spec-total) ;total number of args (xcomment keyword-spec-xcomment)) (define (complex-keyword-spec? keyword-spec) (match keyword-spec (($ _ _ #f #f _ #f) #f) (else #t))) (define %keyword-specs ;; List of valid keywords. (let loop ((opts (cdr (command-line)));command-line options from ;which to extract --keyword ;options (remaining-opts '()) ;unhandled opts (specs '())) ;; Read keywords from command-line options. (define (string->integer str) (if (string-match "[0-9]+" str) (string->number str) (error "Not a decimal integer."))) (define* (argnums->spec id #:optional (argnums '())) (let loop ((sg #f) (pl #f) (c #f) (total #f) (xcomment #f) (argnums argnums)) (match argnums (() (make-keyword-spec id (if sg sg 1) pl c total xcomment)) ((arg . argnums) (cond ((string-suffix? "c" arg) (cond (c (error "c suffix clashes")) (else (let* ((number-str (string-drop-right arg 1)) (number (string->integer number-str))) (loop sg pl number total xcomment argnums))))) ((string-suffix? "g" arg) (cond (sg (error "Only first argnum can have g suffix.")) (c (error "g suffix clashes.")) (else (let* ((number-str (string-drop-right arg 1)) (number (string->integer number-str))) (loop number #f 'mixed total xcomment argnums))))) ((string-suffix? "t" arg) (cond (total (error "t suffix clashes")) (else (let* ((number-str (string-drop-right arg 1)) (number (string->integer number-str))) (loop sg pl c number xcomment argnums))))) ((string-suffix? "\"" arg) (cond (xcomment (error "xcomment clashes")) (else (let* ((comment (substring arg 1 (- (string-length arg) 1)))) (loop sg pl c total comment argnums))))) (else (let* ((number (string->integer arg))) (if sg (if pl (error "Too many argnums.") (loop sg number c total xcomment argnums)) (loop number #f c total xcomment argnums))))))))) (define (string->spec str) ;see `info xgettext` (match (string-split str #\:) ((id) (argnums->spec id)) ((id argnums) (argnums->spec id (string-split argnums #\,))))) (match opts (() (begin ;; remove recognized --keyword command-line options: (set-program-arguments (cons (car (command-line)) (reverse remaining-opts))) specs)) ((current-opt . rest) (cond ((string=? "--" current-opt) specs) ((string-prefix? "--keyword=" current-opt) (let ((keyword (string-drop current-opt (string-length "--keyword=")))) (loop rest remaining-opts (cons (string->spec keyword) specs)))) ((or (string=? "--keyword" current-opt) (string=? "-k" current-opt)) (let ((next-opt (car rest))) (loop (cdr rest) remaining-opts (cons (string->spec next-opt) specs)))) (else (loop rest (cons current-opt remaining-opts) specs))))))) ;;; Other options are not repeated, so we can use getopt-long: (define %options ;; Corresponds to what is documented at `info xgettext`. (let ((option-spec `((files (single-char #\f) (value #t)) (directory (single-char #\D) (value #t)) (default-domain (single-char #\d) (value #t)) (output (single-char #\o) (value #t)) (output-dir (single-char #\p) (value #t)) (from-code (value #t)) (join-existing (single-char #\j) (value #f)) (exclude-file (single-char #\x) (value #t)) (add-comments (single-char #\c) (value #t)) ;; Because getopt-long does not support repeated options, ;; we took care of --keyword options further up. ;; (keyword (single-char #\k) (value #t)) (flag (value #t)) (force-po (value #f)) (indent (single-char #\i) (value #f)) (no-location (value #f)) (add-location (single-char #\n) (value #t)) (width (single-char #\w) (value #t)) (no-wrap (value #f)) (sort-output (single-char #\s) (value #f)) (sort-by-file (single-char #\F) (value #f)) (omit-header (value #f)) (copyright-holder (value #t)) (foreign-user (value #f)) (package-name (value #t)) (package-version (value #t)) (msgid-bugs-address (value #t)) (msgstr-prefix (single-char #\m) (value #t)) (msgstr-suffix (single-char #\m) (value #t)) (help (value #f)) (pack (value #f))))) (getopt-long (command-line) option-spec))) ;; implemented similar to guix/build/po.scm (define parse-scheme-file ;; This procedure parses FILE and returns a parse tree. (let () ;;TODO: OPTIONALLY IGNORE CASE: (define-peg-pattern comment all (and ";" (* (and peg-any (not-followed-by "\n"))) (and peg-any (followed-by "\n")))) (define-peg-pattern whitespace none (or " " "\t" "\n")) (define-peg-pattern quotation body (or "'" "`" "," ",@")) ;TODO ALLOW USER TO SPECIFY OTHER QUOTE CHARACTERS (define-peg-pattern open body (and (? quotation) (or "(" "[" "{"))) (define-peg-pattern close body (or ")" "]" "}")) (define-peg-pattern string body (and (followed-by "\"") (* (or "\\\"" (and peg-any (not-followed-by "\"")))) (and peg-any (followed-by "\"")) "\"")) (define-peg-pattern token all (or string (and (not-followed-by open) (not-followed-by close) (not-followed-by comment) (* (and peg-any (not-followed-by open) (not-followed-by close) (not-followed-by comment) (not-followed-by string) (not-followed-by whitespace))) (or (and peg-any (followed-by open)) (and peg-any (followed-by close)) (and peg-any (followed-by comment)) (and peg-any (followed-by string)) (and peg-any (followed-by whitespace)) (not-followed-by peg-any))))) (define-peg-pattern sexp all (or (and (? quotation) "(" program ")") (and (? quotation) "[" program "]") (and (? quotation) "{" program "}"))) (define-peg-pattern t-or-s body (or token sexp)) (define-peg-pattern program all (* (or whitespace comment t-or-s))) (lambda (file) (call-with-input-file file (lambda (port) ;; it would be nice to match port directly without ;; converting to a string first (let ((string (get-string-all port))) (peg:tree (match-pattern program string)))))))) (define-record-type (make-po-entry ecomments ref flags ctxt id idpl) po-entry? ;;; irrelevant: (tcomments po-entry-tcomments) ;translator-comments (ecomments po-entry-ecomments) ;extracted-comments (ref po-entry-ref) ;reference (flags po-entry-flags) ;;; irrelevant: (prevctxt po-entry-prevctxt) ;previous-ctxt ;;; irrelevant: (prev po-entry-prev) ;previous-translation (ctxt po-entry-ctxt) ;msgctxt (id po-entry-id) ;msgid (idpl po-entry-idpl) ;msgid-plural ;;; irrelevant: (str po-entry-str) ;msgstr string or association list ;;; ;integer to string ) (define (write-po-entry po-entry) (define* (write-component c prefix #:optional (out display)) (when c (begin (display prefix) (display " ") (out c) (newline)))) (match po-entry (($ ecomments ref flags ctxt id idpl) (write-component ecomments "#.") (write-component ref "#:") (write-component flags "#,") (write-component ctxt "msgctxt" write) (write-component id "msgid" write) (write-component idpl "msgid_plural" write) (display "msgstr \"\"") (newline)))) (define %ecomments-string (make-parameter #f)) (define (update-ecomments-string! str) "Sets the value of the parameter object %ecomments-string if str is an ecomments string. An ecomments string is extracted from a comment because it starts with TRANSLATORS or a key specified with --add-comments." ;TODO NOT IMPLEMENTED YET (when (string-prefix? "TRANSLATORS" str) (%ecomments-string str))) ;TODO NOT THE WHOLE STRING (define %line-number (make-parameter #f)) (define (update-line-number! number) "Sets the value of the parameter object %line-number to NUMBER." (%line-number number)) (define (incr-line-number!) "Increments the value of the parameter object %line-number by 1." (%line-number (1+ %line-number))) (define (make-simple-po-entry msgid) (make-po-entry (%ecomments-string) (%line-number) #f ;TODO use scheme-format for format strings? #f ;no ctxt msgid #f)) (define (matching-keyword id) "Returns the keyword-spec whose identifier is the same as ID, or #f if ID is no string or no such keyword-spec exists." (and (symbol? id) (let ((found (member (symbol->string id) %keyword-specs (lambda (id spec) (string=? id (keyword-spec-id spec)))))) (and found (car found))))) (define (nth-exp program n) "Returns the nth 'token or 'sexp inside the PROGRAM parse tree or #f if no tokens or sexps exist." (let loop ((i 0) (rest program)) (define (on-hit exp) (if (= i n) exp ;; else: (loop (1+ i) (cdr rest)))) (match rest (() #f) ((('token exp) . _) (on-hit (car rest))) ((('sexp open-paren exp close-paren) . _) (on-hit (car rest))) ((_ . _) (loop i (cdr rest))) (else #f)))) (define (more-than-one-exp? program) "Returns true if PROGRAM consiste of more than one expression." (if (matching-keyword (token->string-or-symbol (nth-exp program 0))) (nth-exp program 2) ;if there is third element, keyword does not count (nth-exp program 1))) (define (token->string-or-symbol tok) "For a parse tree TOK, if it is a 'token parse tree, returns its value as a string or symbol, otherwise returns #f." (match tok (('token exp) (with-input-from-string exp (lambda () (read)))) (else #f))) (define (complex-marked-sexp->po-entries parse-tree) "Checks if PARSE-TREE is marked by a keyword. If yes, for a complex keyword spec, returns a list of po-entries for it. For a simple keyword spec, returns the argument number of its singular form. Otherwise returns #f." (let* ((first (nth-exp parse-tree 0)) (spec (matching-keyword (token->string-or-symbol first)))) (if spec (if ;if the identifier of a complex keyword occurs first (complex-keyword-spec? spec) ;; then make po entries for it (match spec (($ id sg pl c total xcomment) (if (eq? c 'mixed) ; if msgctxt and singular msgid are in one string (let* ((exp (nth-exp parse-tree sg)) (val (token->string-or-symbol exp)) (idx (if (string? val) (string-rindex val #\|)))) (list (make-po-entry (%ecomments-string) (%line-number) #f ;TODO use scheme-format for format strings? (string-take val idx) (string-drop val (1+ idx)) #f))) ;plural forms are not supported ;; else construct msgids (receive (pl-id pl-entries) (match pl (#t (construct-msgid-and-po-entries (nth-exp parse-tree pl))) (#f (values #f '()))) (receive (sg-id sg-entries) (construct-msgid-and-po-entries (nth-exp parse-tree sg)) (cons (make-po-entry (%ecomments-string) (%line-number) #f ;TODO use scheme-format for format strings? (and c (token->string-or-symbol (nth-exp parse-tree c))) sg-id pl-id) (append sg-entries pl-entries))))))) ;; else if it is a simple keyword, return the argnum: (keyword-spec-sg spec)) ;; if no keyword occurs, then false #f))) (define (construct-po-entries parse-tree) "Converts a PARSE-TREE resulting from a call to parse-scheme-file to a list of po-entry records. Unlike construct-msgid-and-po-entries, strings are not collected to a msgid. The list of po-entry records is the return value." (let ((entries (complex-marked-sexp->po-entries parse-tree))) (cond ((list? entries) entries) ((number? entries) ;parse-tree yields a single, simple po entry (receive (id entries) (construct-msgid-and-po-entries (nth-exp parse-tree entries)) (cons (make-simple-po-entry id) entries))) (else ;search for marked translations in parse-tree (match parse-tree (() '()) (('comment str) (begin (update-ecomments-string! str) '())) ;; TODO UPDATE %line-number ON NL (('token str) '()) (('sexp open-paren program close-paren) (construct-po-entries program)) (('program . components) (append-map construct-po-entries components))))))) (define* (tag counter prefix #:key (flavor 'start)) "Formats the number COUNTER as a tag according to FLAVOR, which is either 'start, 'end or 'empty for a start, end or empty tag, respectively." (string-append "<" (if (eq? flavor 'end) "/" "") prefix (number->string counter) (if (eq? flavor 'empty) "/" "") ">")) (define-record-type (make-construct-fold-state msgid-string counter po-entries) construct-fold-state? (msgid-string construct-fold-state-msgid-string) (counter construct-fold-state-counter) (po-entries construct-fold-state-po-entries)) (define* (construct-msgid-and-po-entries parse-tree #:optional (prefix "")) "Like construct-po-entries, but with two return values. The first is an accumulated msgid constructed from all components in PARSE-TREE for use in make-po-entry. Non-strings are replaced by tags containing PREFIX. The second return value is a list of po entries for subexpressions marked with a complex keyword spec." (match parse-tree (() (values "" '())) (('comment str) (begin (update-ecomments-string! str) (values "" '()))) ;; TODO UPDATE %line-number ON NL (('token exp) (let ((maybe-string (token->string-or-symbol parse-tree))) (if (string? maybe-string) (values maybe-string '()) (error "Single symbol marked for translation." maybe-string)))) (('sexp open-paren program close-paren) ;; parse program instead (construct-msgid-and-po-entries program prefix)) (('program . components) ;; Concatenate strings in parse-tree to a new msgid and add an ;; tag for each sexp in between. (match (fold (lambda (component prev-state) (match prev-state (($ msgid-string counter po-entries) (match component (('comment str) (begin (update-ecomments-string! str) prev-state)) ;; TODO INCREASE %line-number ON NL (('token exp) (let ((maybe-string (token->string-or-symbol component))) (cond ((string? maybe-string) ;; if string, append maybe-string to previous msgid (make-construct-fold-state (string-append msgid-string maybe-string) counter po-entries)) ((and (more-than-one-exp? components) ;not the only symbol (or (string-null? msgid-string) ;no string so far (string-suffix? ">" msgid-string))) ;tag before prev-state) ;then ignore ((matching-keyword maybe-string) prev-state) ;ignore keyword token) (else ;append tag representing the token (make-construct-fold-state (string-append msgid-string (tag counter prefix #:flavor 'empty)) (1+ counter) po-entries))))) (('sexp open-paren program close-paren) (let ((first (nth-exp program 0))) (match (complex-marked-sexp->po-entries program) ((? list? result) (make-construct-fold-state (string-append msgid-string (tag counter prefix #:flavor 'empty)) (1+ counter) (append result po-entries))) (result (if (or (number? result) (not (more-than-one-exp? components))) (receive (id entries) (construct-msgid-and-po-entries program (string-append prefix (number->string counter) ".")) (make-construct-fold-state (string-append msgid-string (tag counter prefix #:flavor 'start) id (tag counter prefix #:flavor 'end)) (1+ counter) (append entries po-entries))) ;; else ignore unmarked sexp prev-state))))))))) (make-construct-fold-state "" 1 '()) components) (($ msgid-string counter po-entries) (values msgid-string po-entries)))))) (define scheme-file->po-entries (compose construct-po-entries parse-scheme-file)) (define %files-from-port (let ((files-from (option-ref %options 'files #f))) (if files-from (open-input-file files-from) (current-input-port)))) (define %scheme-files (let loop ((line (get-line %files-from-port)) (scheme-files '())) (if (eof-object? line) (begin (close-port %files-from-port) scheme-files) ;; else read file names before comment (let ((before-comment (car (string-split line #\#)))) (loop (get-line %files-from-port) (append (map match:substring (list-matches "[^ \t]+" line)) scheme-files)))))) (define %output-po-entries (fold (lambda (scheme-file po-entries) (append (scheme-file->po-entries scheme-file) po-entries)) '() %scheme-files)) (define %output-port (let ((output (option-ref %options 'output #f))) (if output (open-output-file output) (current-output-port)))) (with-output-to-port %output-port (lambda () (for-each (lambda (po-entry) (begin (newline) (write-po-entry po-entry))) %output-po-entries)))