unofficial mirror of guile-user@gnu.org 
 help / color / mirror / Atom feed
* regex-split for Guile
@ 2011-03-07 14:57 William James
  2011-03-12  2:08 ` Neil Jerram
  0 siblings, 1 reply; 3+ messages in thread
From: William James @ 2011-03-07 14:57 UTC (permalink / raw)
  To: guile-user

Tested under Guile 1.8.7.


(define (regex-split regexp str . options)
  (let ((keep #f) (trim #f))
    (if (member 'keep options)
      (begin (set! options (delete 'keep options))
             (set! keep #t)))
    (if (member 'trim options)
      (begin (set! options (delete 'trim options))
             (set! trim #t)))
    (let* ((matches (apply list-matches regexp str options))
           (indices
             (append '(0)
               (fold-right
                 (lambda (m acc) (cons (match:start m)
                                   (cons (match:end m) acc))) '()
                 matches)
               (list (string-length str))))
           (substrings
              (pair-fold-right
                (lambda (lst accum)
                  (if (or (even? (length lst))
                          (and keep (> (length lst) 1)))
                    (cons (apply substring str (take lst 2)) accum)
                    accum))
                '()
                indices)))
      (if trim
        (reverse! (drop-while string-null?
          (reverse! (drop-while string-null? substrings))))
        substrings))))



guile> (regex-split "[-x]+" "foo--x--bar---what--")

("foo" "bar" "what" "")

guile> (regex-split "[-x]+" "foo--x--bar---what--"  'trim)

("foo" "bar" "what")

guile> (regex-split "[-x]+" "foo--x--bar---what"  'keep)

("foo" "--x--" "bar" "---" "what")



      



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: regex-split for Guile
  2011-03-07 14:57 regex-split for Guile William James
@ 2011-03-12  2:08 ` Neil Jerram
  0 siblings, 0 replies; 3+ messages in thread
From: Neil Jerram @ 2011-03-12  2:08 UTC (permalink / raw)
  To: William James; +Cc: guile-user

William James <w_a_x_man@yahoo.com> writes:

> (define (regex-split regexp str . options)

Thanks for posting that!  For fun/interest, here's an alternative
implementation that occurred to me.

       Neil


(use-modules (ice-9 regex)
	     (ice-9 string-fun))

(define (regex-split regex str . opts)
  (let* ((unique-char #\@)
	 (unique-char-string (string unique-char)))
    (let ((splits (separate-fields-discarding-char
		   unique-char
		   (regexp-substitute/global #f
					     regex
					     str
					     'pre
					     unique-char-string
					     0
					     unique-char-string
					     'post)
		   list)))
      (cond ((memq 'keep opts)
	     splits)
	    (else
	     (let ((non-matches (map (lambda (i)
				       (list-ref splits (* i 2)))
				     (iota (floor (/ (1+ (length splits)) 2))))))
	       (if (memq 'trim opts)
		   (filter (lambda (s)
			     (not (zero? (string-length s))))
			   non-matches)
		   non-matches)))))))



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: regex-split for Guile
@ 2011-03-14 14:54 William James
  0 siblings, 0 replies; 3+ messages in thread
From: William James @ 2011-03-14 14:54 UTC (permalink / raw)
  To: guile-user

Neil Jerram wrote:

> Thanks for posting that!  For fun/interest, here's an alternative
> implementation that occurred to me.
> 
>        Neil

Thanks for the feedback.

> 
> 
> (use-modules (ice-9 regex)
>              (ice-9 string-fun))
> 
> (define (regex-split regex str . opts)
>   (let* ((unique-char #\@)
>          (unique-char-string (string unique-char)))
>     (let ((splits (separate-fields-discarding-char
>                    unique-char
>                    (regexp-substitute/global #f
>                                              regex
>                                              str
>                                              'pre
>                                              unique-char-string
>                                              0
>                                              unique-char-string
>                                              'post)
>                    list)))

This is an approach that I used some years ago in Awk.
ASCII code 1 is used as the unique character:

# Produces array of nonmatching and matching
# substrings. The size of the array will
# always be an odd number. The first and the
# last item will always be nonmatching.
function shatter( s, shards, regexp )
{ gsub( regexp, "\1&\1", s  )
  return split( s, shards, "\1" )
}


>       (cond ((memq 'keep opts)
>              splits)
>             (else
>              (let ((non-matches (map (lambda (i)
>                                        (list-ref splits (* i 2)))
>                                      (iota (floor (/ (1+ (length 
> splits)) 
> 2))))))
>                (if (memq 'trim opts)
>                    (filter (lambda (s)
>                              (not (zero? (string-length s))))
>                            non-matches)
>                    non-matches)))))))

The way that I want 'trim to work is to remove just the
leading and trailing empty strings.  In Ruby, trailing
null strings are removed by default:

",foo,,,bar,".split( "," )
    ==>["", "foo", "", "", "bar"]




      



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-03-14 14:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-07 14:57 regex-split for Guile William James
2011-03-12  2:08 ` Neil Jerram
  -- strict thread matches above, loose matches on Subject: below --
2011-03-14 14:54 William James

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).