unofficial mirror of guile-user@gnu.org 
 help / color / mirror / Atom feed
* biomail2ris 1.0
@ 2002-12-04  5:23 Thien-Thi Nguyen
  0 siblings, 0 replies; only message in thread
From: Thien-Thi Nguyen @ 2002-12-04  5:23 UTC (permalink / raw)
  Cc: guile-user, markus

writing this exposes a limitation of module (ttn edit): there is no way
to specify the equivalent of emacs' case-fold-search.  that's why there
are ugly caps in the `find' regexp, ready to fail when/if the biomail
folks change their html generation scheme.  (for a more general solution
to parsing html, see LAML.)

biomail2ris has been tested with contemporary biomail option "html in
attachment" and a slightly hacked refdb-0.9 installation.

viva science and the scientific method!

thi



_____________________________________
#!/bin/sh
# aside from this initial boilerplate, this is actually -*- scheme -*- code
main='(module-ref (resolve-module '\''(biomail2ris)) '\'main')'
exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
!#
;;; ID: $Id: biomail2ris.scm,v 1.1 2002/12/04 04:58:41 ttn Exp $
;;;
;;; Copyright (C) 2002 Thien-Thi Nguyen
;;; This program is part of ttn-do, released under GNU GPL v2 with ABSOLUTELY
;;; NO WARRANTY.  See http://www.gnu.org/copyleft/gpl.txt for details.

;;; Commentary:

;; Usage: biomail2ris BIOMAIL-HTML-MAIL
;;
;; Parse file BIOMAIL-HTML-MAIL, retrieve in Medline format all the citations
;; therein, filter them through nmed2ris and display the output to stdout.

;;; Code:

(define-module (biomail2ris)
  :use-module ((scripts PROGRAM) :select (script-MAIN))
  :use-module (www http)
  :use-module (www url)
  :use-module (ttn gap-buffer)
  :use-module (ttn edit)
  :use-module (ttn make-buffered-caller)
  :export (biomail2ris))

(define *biomail2ris-version* "1.0")

(define (biomail-get linear-url fields)
  (http:post-form (url:parse linear-url)
                  (list (format #f "User-Agent: biomail2ris/~A"
                                *biomail2ris-version*))
                  (cons '(submit . "Medline format") fields)))

(use-modules (scripts slurp))           ; testing

(define (biomail2ris . args)
  (let* ((sel '()) (user #f) (mail #f) (subj #f) (lang #f)
         (buf (make-gap-buffer (slurp (car args))))
         (find (lambda (buf var)
                 (let ((re (format #f "<INPUT .+NAME=~S.+VALUE=\"(.*)\""
                                   var)))
                   (editing-buffer buf
                     (goto-char (point-min))
                     (re-search-forward re)
                     (match-string 1))))))
    (editing-buffer buf
      (goto-char (point-min))
      (while (re-search-forward "<input.+checkbox.+value=\"([0-9]+)\""
                                (point-max) #t)
        (set! sel (acons 'uid (match-string 1) sel))))
    (set! user (find buf "user"))
    (set! mail (find buf "mail"))
    (set! subj (find buf "subj"))
    (set! lang (find buf "lang"))
    (let* ((ans (biomail-get "http://www.biomail.org"
                             (append sel `((user . ,user)
                                           (mail . ,mail)
                                           (subj . ,subj)
                                           (lang . ,lang)))))
           (msg (string-append (http:message-body ans) "\n\n"))
           (last-point #f))
      ;; clean it up
      (set! buf (make-gap-buffer msg))
      (editing-buffer buf
        (goto-char (point-min))
        (set! last-point (point))
        (while (re-search-forward "^Paper#.*\n--*\n" (point-max) #t)
          (delete-region last-point (point))
          (search-forward "\n\n")
          (set! last-point (point))))
      (set! msg (gb->string buf))
      ;; lastly, convert to ris format
      (let ((nmed2ris (make-buffered-caller "nmed2ris" #:inb buf)))
        (nmed2ris 'execute)
        (display (nmed2ris 'outbuf-string)))))
  #t)

(define (main . args)
  (script-MAIN args
               "biomail2ris" biomail2ris
               '(usage . commentary)))

;;; biomail2ris ends here


_______________________________________________
Guile-user mailing list
Guile-user@gnu.org
http://mail.gnu.org/mailman/listinfo/guile-user


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2002-12-04  5:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-12-04  5:23 biomail2ris 1.0 Thien-Thi Nguyen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).