unofficial mirror of help-gnu-emacs@gnu.org
 help / color / mirror / Atom feed
From: Psionic K <psionik@positron.solutions>
To: help-gnu-emacs@gnu.org
Cc: Tomas Hlavaty <tom@logand.com>
Subject: Re: Identifying sources of allocations in a toy Mandelbrot package
Date: Sat, 27 Jan 2024 18:25:32 +0900	[thread overview]
Message-ID: <CADQMGAQJJuAutkpxuSQK-sjruqpw==nOpWU5amKAhjsi0egRHA@mail.gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 2015 bytes --]

I wanted to dig into the CPU versus memory limitation a bit.

I switched back to writing to a vector, not touching any buffers to
wrap the profile around the hot section.  CPU + mem results:

fixed point byte-compiled:       195 samples     3,890,391 bytes
fixed point native compiled:     250 samples     4,205,335 bytes
floating point byte-compiled:    560 samples   221,858,239 bytes
floating point native compiled:  409 samples   221,732,687 bytes

* There is no typo in the native & byte compiled fixed point results.
I made a few runs.  It is slower when native compiled.
* I found no other combination of `defsubst' `defun' or declaring
speed or not that could get the native compiled fixed point version to
be faster than byte compiled.
* The floating point version was only measured when the run did not
trigger GC.  This inflates its performance because GC cost, about 30%,
is not shown.

While the fixed point is fast considering it has to do more work,
perhaps if the native compiler didn't manage to slow it down, it could
be faster.

Regarding my previous statements about memory bandwidth being the
limiting factor, while the memory used is on the lower end of an order
of magnitude within the random write bandwidth available.  If that
amount was limiting, the compilation method would not be expected to
affect performance much.  Since the native compiled floating point is
faster, it is likely CPU bound.  Consumption is just a proxy for
bandwidth used.  Runtimes use bandwidth without new allocation.
Better tools are needed to answer this conclusively.

I may continue this exercise.  My initial idea to create clarity is by
writing a function that just throws away conses that hold values that
require 1, 10, 100, or 1000 steps to compute.  If the performance
scales inversely by conses but not by steps, we can isolate memory
versus computation as the bottleneck for some workflows and provide a
benchmark for the compiler and GC to be smarter about not generating
garbage in the first place.

[-- Attachment #2: fixed.el --]
[-- Type: text/x-emacs-lisp, Size: 2093 bytes --]

;;; -*- lexical-binding: t -*-

;; samples that hit 256 wrap in the graphic to display as zero
;; exponential 0.8 enhances contrast a bit
(let ((a (make-vector 257 0)))
  (dotimes (v 256)
    (aset a v (floor (* 256 (expt (/ v 256.0) 0.8)))))
  (define-inline contrast (v)
    (aref a v)))

(defconst +1fx+ (expt 2 12))

(define-inline fx (x)
  (* x +1fx+))

(define-inline fx2 (n d)
  (/ (* n +1fx+) d))

(defconst +4fx+ (fx 4))

(define-inline fx* (l r)
  (/ (* l r) +1fx+))

(define-inline fx^2 (x)
  (fx* x x))

(define-inline mandelbrot1 (cx cy)
  (let ((zr 0)
	(zi 0)
	(v 0)
	zr^2
	zi^2)
    (while (and (< v 256) (<= (+ (setq zr^2 (fx^2 zr)) (setq zi^2 (fx^2 zi))) +4fx+))
      (setq v (1+ v))
      (let ((tmp (+ zr^2 (- zi^2) cx)))
	(setq zi (+ (* (fx* zr zi) 2) cy))
	(setq zr tmp)))
    v))

(defun mandelbrot (&optional w h)
  (declare (speed 3))
  (interactive)
  (let ((output (make-vector (* w h) 0.0))
        (idx 0))
    (let ((w (or w 1600))
          (h (or h 1200))
          (x0 (fx2 -25 10))
          (y0 (fx2 -15 10)))
      (let ((dxp (/ (fx 4) w))
	    (dyp (/ (fx 3) h)))
        (dotimes (y h)
	  (let ((cy (+ y0 (* dyp y))))
	    (dotimes (x w)
	      (let ((v (contrast (mandelbrot1 (+ x0 (* dxp x)) cy))))
	        (aset output idx v)
                (setq idx (1+ idx))))))))
    output))

(defun mandelbrot-bench ()
  (interactive)
  (let (output)
    (profiler-start 'cpu+mem)
    (setq output (mandelbrot 320 200))
    (profiler-stop)
    (profiler-report)
    (message "%S" output)))

;;(mandelbrot 320 200)
;;(mandelbrot 640 480)
;;(mandelbrot 1024 768)
;;(mandelbrot)

;; (profiler-start 'mem)
;; (mandelbrot 320 200)
;; (profiler-stop)
;; (profiler-report)

;;(profiler-start 'cpu)
;;(mandelbrot 320 200)
;;(profiler-stop)
;;(profiler-report)

;;(profiler-start 'cpu+mem)
;;(mandelbrot)
;;(profiler-stop)
;;(profiler-report)

;; (benchmark-run 1 (mandelbrot 320 200))










































































































;;(32.898215598 2 0.09366582000001245)
;;(51.629763289 2 0.09165389300000015)

[-- Attachment #3: floating.el --]
[-- Type: text/x-emacs-lisp, Size: 1730 bytes --]

;;; -*- lexical-binding: t -*-

(define-inline mandelbrot1 (cx cy)
  (let ((zr 0)
	(zi 0)
	(v 0))
    (while (and (< v 256) (<= (+ (* zr zr) (* zi zi)) 4))
      (setq v (1+ v))
      (let ((tmp (+ (* zr zr) (- (* zi zi)) cx)))
	(setq zi (+ (* (* zr zi) 2) cy))
	(setq zr tmp)))
    ;; samples that hit 256 wrap in the graphic to display as zero
    ;; exponential 0.8 enhances contrast a bit
    (floor (* 256 (expt (/ v 256.0) 0.8)))))

(defun mandelbrot (&optional w h)
  (declare (speed 3))
  (interactive)
  (let ((output (make-vector (* w h) 0.0))
        (idx 0))
    (let ((w (or w 1600))
	  (h (or h 1200))
          (x0 -2.5)
	  (y0 -1.5)
	  (dx 4.0)
	  (dy 3.0))
      (let ((dxp (/ dx w))
	    (dyp (/ dy h)))
        (dotimes (y h)
	  (let ((cy (+ y0 (* dyp y))))
	    (dotimes (x w)
	      (let ((v (mandelbrot1 (+ x0 (* dxp x)) cy)))
	        (aset output idx v)
                (setq idx (1+ idx))))))))
    output))

(defun mandelbrot-bench ()
  (interactive)
  (let (output)
    (profiler-start 'cpu+mem)
    (setq output (mandelbrot 320 200))
    (profiler-stop)
    (profiler-report)
    (message "%S" output)))

;;(mandelbrot 320 200)
;;(mandelbrot 640 480)
;;(mandelbrot 1024 768)
;;(mandelbrot)

;; (profiler-start 'mem)
;; (mandelbrot 320 200)
;; (profiler-stop)
;; (profiler-report)

;; (profiler-start 'cpu+mem)
;; (mandelbrot 320 200)
;; (profiler-stop)
;; (profiler-report)

;; (profiler-start 'cpu)
;; (mandelbrot 320 200)
;; (profiler-stop)
;; (profiler-report)

;;(profiler-start 'cpu+mem)
;;(mandelbrot)
;;(profiler-stop)
;;(profiler-report)

;; (benchmark-run 1 (mandelbrot))
;; (15.033354357 15 3.9534469799999954)
;;(120.40541861 1947 93.45048212499998)
;;(128.362728323 1942 93.44881820700004)

             reply	other threads:[~2024-01-27  9:25 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-27  9:25 Psionic K [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-01-19  9:19 Identifying sources of allocations in a toy Mandelbrot package Psionic K
2024-01-19 15:33 ` Tomas Hlavaty
2024-01-20  3:14   ` Psionic K
2024-01-20  3:37     ` Psionic K
2024-01-20  7:29     ` Eli Zaretskii
2024-01-20  9:09     ` Tomas Hlavaty
2024-01-20 10:03       ` Psionic K
2024-01-20 10:31         ` Eli Zaretskii
2024-01-26 23:36         ` Tomas Hlavaty
2024-01-27  1:07           ` Psionic K
2024-01-19 15:44 ` Eli Zaretskii
2024-01-17 12:39 Psionic K
2024-01-17 12:58 ` Eli Zaretskii
2024-01-17 13:25 ` Emanuel Berg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CADQMGAQJJuAutkpxuSQK-sjruqpw==nOpWU5amKAhjsi0egRHA@mail.gmail.com' \
    --to=psionik@positron.solutions \
    --cc=help-gnu-emacs@gnu.org \
    --cc=tom@logand.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).