(use-modules (charting) ((guix store) #:select (%store-prefix)) (ice-9 ftw) (ice-9 match) (srfi srfi-1) (srfi srfi-9)) (define-record-type (deduplicated-file name size links) deduplicated-file? (name deduplicated-file-name) (size deduplicated-file-size) (links deduplicated-file-link-count)) (define %links-directory (string-append (%store-prefix) "/.links")) (define (links) "Return a list of ." (file-system-fold (const #t) (lambda (file stat result) ;leaf (cons (deduplicated-file file (stat:size stat) (stat:nlink stat)) result)) (lambda (directory stat result) ;down result) (lambda (directory stat result) ;up result) (const #f) ;skip (lambda (file stat errno result) (error "i/o error" file errno)) '() %links-directory lstat)) (define KiB (expt 2 10)) (define MiB (* KiB KiB)) (define GiB (* KiB MiB)) (define (saved-space files) "Return the total amount of saved space given FILES, a list of ." (fold (lambda (df result) (match df (($ name size links) (when (< links 2) (error "too few links" name links)) (+ result (* size (- links 2)))))) 0 files)) (define (cumulative-distribution files property) "Return a list of (VALUE . COUNT) pairs representing the number of FILES whose PROPERTY is VALUE or less." (define (filestring (log2 (inexact->exact tick)))) (number->string (inexact->exact tick))) (define (adjust-items total) (lambda (x) (match x ;; XXX: Filter out the two cases that would give us a numerical ;; overflow. ((0 . _) #f) ((1 . _) #f) ((value . count) (and (or (not max-x) (< value max-x)) (cons value (* 100. (/ count total)))))))) (match distribution (((_ . total) . rest) (let ((percent (filter-map (adjust-items total) distribution))) (make-scatter-plot #:title (string-append "Cumulative distribution by " subtitle) #:data `((,group-name ,@percent)) #:x-axis-label x-axis-label #:y-axis-label "%" #:tick-label-formatter format-log2-tick #:log-x-base 2 #:min-x 1 #:max-y 101 #:write-to-png output))))) #! Examples (define l (links)) ;this is the expensive part (plot-distribution (cumulative-distribution l deduplicated-file-link-count) "/tmp/nlink.png" #:x-axis-label "number of hard links" #:subtitle "hard link count" #:max-x 2048 #:group-name "nlinks") (plot-distribution (cumulative-distribution (filter (lambda (file) (< (deduplicated-file-size file) 1024)) l) deduplicated-file-link-count) "/tmp/nlink-small.png" #:x-axis-label "number of hard links" #:subtitle "hard link count for files < 1KiB" #:max-x 2048 #:group-name "nlinks") (plot-distribution (cumulative-distribution l deduplicated-file-size) "/tmp/size.png" #:x-axis-label "file size" #:subtitle "file size" #:max-x 32768 #:group-name "size (B)") (plot-distribution (cumulative-distribution (filter (lambda (f) (> (deduplicated-file-link-count f) 2)) l) deduplicated-file-size) "/tmp/size-deduplicated.png" #:x-axis-label "file size" #:subtitle "size for files actually deduplicated" #:max-x 32768 #:group-name "size (B)") (plot-distribution (cumulative-distribution (filter (lambda (file) (< (deduplicated-file-size file) 1024)) l) (lambda (file) (* (deduplicated-file-size file) (- (deduplicated-file-link-count file) 2)))) "/tmp/size-savings.png" #:x-axis-label "savings" #:subtitle "savings for files < 1KiB" #:max-x 32768 #:group-name "savings (B)") !#