unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
From: Kevin Ryde <user42@zip.com.au>
Subject: srfi-1 delete-duplicates
Date: Sun, 22 Jun 2003 10:23:01 +1000	[thread overview]
Message-ID: <87u1ajklju.fsf@zip.com.au> (raw)

This is new delete-duplicates and delete-duplicates!, avoiding the
non-tail-recursions of the current implementations.

Code and test cases below for contemplation.  The loops are a bit
hairy, but seem to run ok.  I'm intending to give them a bit more of a
think before actually checking them in.






SCM_DEFINE (scm_srfi1_delete_duplicates, "delete-duplicates", 1, 1, 0,
	    (SCM lst, SCM pred),
	    "Return a list containing the elements of @var{lst} but without\n"
	    "duplicates.\n"
	    "\n"
	    "When elements are equal, only the first in @var{lst} is\n"
	    "retained.  Equal elements can be anywhere in @var{lst}, they\n"
	    "don't have to be adjacent.  The returned list will have the\n"
	    "retained elements in the same order as they were in @var{lst}.\n"
	    "\n"
	    "Equality is determined by @var{pred}, or @code{equal?} if not\n"
	    "given.  Calls @code{(pred x y)} are made with element @var{x}\n"
	    "being before @var{y} in @var{lst}.  A call is made at most once\n"
	    "for each combination, but the sequence of the calls across the\n"
	    "elements is unspecified.\n"
	    "\n"
	    "@var{lst} is not modified, but the return might share a common\n"
	    "tail with @var{lst}.\n"
	    "\n"
	    "In the worst case, this is an @math{O(N^2)} algorithm because\n"
	    "it must check each element against all those preceding it.  For\n"
	    "long lists it is more efficient to sort and then compare only\n"
	    "adjacent elements.")
#define FUNC_NAME s_scm_srfi1_delete_duplicates
{
  scm_t_trampoline_2 equal_p;
  SCM  ret, *p, keeplst, item, l;

  /* skip to end if an empty list (or something invalid) */
  ret = lst;
  if (SCM_CONSP (lst))
    {
      if (SCM_UNBNDP (pred))
        equal_p = equal_trampoline;
      else
        {
          equal_p = scm_trampoline_2 (pred);
          SCM_ASSERT (equal_p, pred, SCM_ARG2, FUNC_NAME);
        }

      /* ret is the new list constructed.  p is where to append, initially
         &ret then SCM_CDRLOC of the last pair.  lst is advanced as each
         element is considered.

         Elements retained are not immediately appended to ret, instead
         keeplst is the last pair in lst which is to be kept but is not yet
         copied.  Initially this is the first pair of lst, since the first
         is always retained.  *p is kept set to keeplst, so ret (inclusive)
         to lst (exclusive) is all the elements retained, making the
         equality search easy.

         If an item must be deleted, elements from keeplst (inclusive) to
         lst (exclusive) must be copied and appended to ret.  When there's
         no more deletions, *p is left set to keeplst, so ret shares
         structure with the original lst.  (ret will be the entire original
         lst if there's no deletions.)  */

      keeplst = lst;
      p = &ret;

      /* loop over lst elements starting from second */
      for (;;)
        {
          lst = SCM_CDR (lst);
          if (! SCM_CONSP (lst))
            break;

          item = SCM_CAR (lst);

          /* loop searching ret upto lst */
          for (l = ret; l != lst; l = SCM_CDR (l))
            {
              if (SCM_NFALSEP (equal_p (pred, SCM_CAR (l), item)))
                {
                  /* duplicate, don't want this element, so copy keeplst
                     (inclusive) to lst (exclusive) onto ret */
                  while (keeplst != lst)
                    {
                      SCM c = scm_cons (SCM_CAR (keeplst), SCM_EOL);
                      *p = c;
                      p = SCM_CDRLOC (c);
                      keeplst = SCM_CDR (keeplst);
                    }

                  keeplst = SCM_CDR (keeplst);
                  *p = keeplst;
                  break;
                }
            }
        }
    }

  /* demand that lst was a proper list */
  SCM_ASSERT_TYPE (SCM_NULL_OR_NIL_P (lst), lst, SCM_ARG1, FUNC_NAME,
                   "list");
  return ret;
}
#undef FUNC_NAME


SCM_DEFINE (scm_srfi1_delete_duplicates_x, "delete-duplicates!", 1, 1, 0,
	    (SCM lst, SCM pred),
	    "Return a list containing the elements of @var{lst} but without\n"
	    "duplicates.\n"
	    "\n"
	    "When elements are equal, only the first in @var{lst} is\n"
	    "retained.  Equal elements can be anywhere in @var{lst}, they\n"
	    "don't have to be adjacent.  The returned list will have the\n"
	    "retained elements in the same order as they were in @var{lst}.\n"
	    "\n"
	    "Equality is determined by @var{pred}, or @code{equal?} if not\n"
	    "given.  Calls @code{(pred x y)} are made with element @var{x}\n"
	    "being before @var{y} in @var{lst}.  A call is made at most once\n"
	    "for each combination, but the sequence of the calls across the\n"
	    "elements is unspecified.\n"
	    "\n"
	    "@var{lst} may be modified to construct the returned list.\n"
	    "\n"
	    "In the worst case, this is an @math{O(N^2)} algorithm because\n"
	    "it must check each element against all those preceding it.  For\n"
	    "long lists it is more efficient to sort and then compare only\n"
	    "adjacent elements.")
#define FUNC_NAME s_scm_srfi1_delete_duplicates_x
{
  scm_t_trampoline_2 equal_p;
  SCM  ret, endret, item, l;

  /* skip to end if an empty list (or something invalid) */
  ret = lst;
  if (SCM_CONSP (lst))
    {
      if (SCM_UNBNDP (pred))
        equal_p = equal_trampoline;
      else
        {
          equal_p = scm_trampoline_2 (pred);
          SCM_ASSERT (equal_p, pred, SCM_ARG2, FUNC_NAME);
        }

      /* ret is the return list, constructed from the pairs of lst.  endret
         is the last pair of ret, initially the first.  lst is advanced as
         elements are considered.  */

      endret = ret;
      for (;;)
        {
          lst = SCM_CDR (lst);
          if (! SCM_CONSP (lst))
            break;

          /* is item equal to any element from ret to endret (inclusive)? */
          item = SCM_CAR (lst);
          l = ret;
          for (;;)
            {
              if (SCM_NFALSEP (equal_p (pred, SCM_CAR (l), item)))
                break;  /* equal, forget this element */

              if (l == endret)
                {
                  /* not equal to any, so append this pair */
                  * SCM_CDRLOC (endret) = lst;
                  endret = lst;
                  break;
                }
              l = SCM_CDR (l);
            }
        }

      /* terminate, in case last element was deleted */
      * SCM_CDRLOC (endret) = SCM_EOL;
    }

  /* demand that lst was a proper list */
  SCM_ASSERT_TYPE (SCM_NULL_OR_NIL_P (lst), lst, SCM_ARG1, FUNC_NAME, "list");

  return ret;
}
#undef FUNC_NAME






(define (ref-delete-duplicates lst . proc)
  "Reference version of srfi-1 `delete-duplicates'."
  (set! proc (if (null? proc) equal? (car proc)))
  (if (null? lst)
      '()
      (do ((keep '()))
	  ((null? lst)
	   (reverse! keep))
	(set! keep (cons (car lst) keep))
	(set! lst  (ref-delete (car lst) lst proc)))))


;;
;; delete-duplicates and delete-duplicates!
;;

(let ()	
  ;; Call (PROC lst) for all lists of length n <= 4, with all combinations
  ;; of numbers 1 to n in the elements
  (define (test-lists proc)
    (do ((n 1 (1+ n)))
	((> n 4))
      (do ((limit (integer-expt n n))
	   (i 0 (1+ i)))
	  ((>= i limit))
	(let ((lst '()))
	  (do ((j 0 (1+ j))
	       (rem i (quotient rem n)))
	      ((>= j n))
	    (set! lst (cons (remainder rem n) lst)))
	  (proc lst)))))

  (define (common-tests delete-duplicates-proc)
    (pass-if-exception "too few args" exception:wrong-num-args
      (delete-duplicates-proc))
    
    (pass-if-exception "too many args" exception:wrong-num-args
      (delete-duplicates-proc '() equal? 99))
    
    (pass-if "empty"
      (eq? '() (delete-duplicates-proc '())))
    
    (pass-if "equal? (default)"
      (equal? '((2))
	      (delete-duplicates-proc '((2) (2) (2)))))
    
    (pass-if "eq?"
      (equal? '((2) (2) (2))
	      (delete-duplicates-proc '((2) (2) (2)) eq?)))

    (pass-if "called arg order"
      (let ((ok #t))
	(delete-duplicates-proc '(1 2 3 4 5)
				(lambda (x y)
				  (if (> x y)
				      (set! ok #f))
				  #f))
	ok)))
  
  (with-test-prefix "delete-duplicates"
    (common-tests delete-duplicates)
    
    (test-lists
     (lambda (lst)
       (let ((lst-copy (list-copy lst)))
	 (with-test-prefix lst-copy
	   (pass-if "result"
	     (equal? (delete-duplicates     lst)
		     (ref-delete-duplicates lst)))
	   (pass-if "non-destructive"
	     (equal? lst-copy lst)))))))  
  
  (with-test-prefix "delete-duplicates!"
    (common-tests delete-duplicates!)
    
    (test-lists
     (lambda (lst)
       (pass-if lst
	 (equal? (delete-duplicates!    lst)
		 (ref-delete-duplicates lst)))))))


_______________________________________________
Guile-devel mailing list
Guile-devel@gnu.org
http://mail.gnu.org/mailman/listinfo/guile-devel


             reply	other threads:[~2003-06-22  0:23 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-06-22  0:23 Kevin Ryde [this message]
2003-07-27 15:11 ` srfi-1 delete-duplicates Marius Vollmer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/guile/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87u1ajklju.fsf@zip.com.au \
    --to=user42@zip.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).