unofficial mirror of guix-patches@gnu.org 
 help / color / mirror / code / Atom feed
blob d1ca35f7caea366ef5001ace1c8fe1d0a2a3334c 6269 bytes (raw)
name: gnu/packages/patches/glibc-2-26-0083.patch 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
 
From aa5be982ea1f59ee1e479fe9a561aeafd91a2261 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date: Thu, 16 Nov 2017 12:21:27 +0530
Subject: [PATCH 83/90] Use relaxed atomics for malloc have_fastchunks

Currently free typically uses 2 atomic operations per call.  The have_fastchunks
flag indicates whether there are recently freed blocks in the fastbins.  This
is purely an optimization to avoid calling malloc_consolidate too often and
avoiding the overhead of walking all fast bins even if all are empty during a
sequence of allocations.  However using catomic_or to update the flag is
completely unnecessary since it can be changed into a simple boolean and
accessed using relaxed atomics.  There is no change in multi-threaded behaviour
given the flag is already approximate (it may be set when there are no blocks in
any fast bins, or it may be clear when there are free blocks that could be
consolidated).

Performance of malloc/free improves by 27% on a simple benchmark on AArch64
(both single and multithreaded). The number of load/store exclusive instructions
is reduced by 33%. Bench-malloc-thread speeds up by ~3% in all cases.

	* malloc/malloc.c (FASTCHUNKS_BIT): Remove.
	(have_fastchunks): Remove.
	(clear_fastchunks): Remove.
	(set_fastchunks): Remove.
	(malloc_state): Add have_fastchunks.
	(malloc_init_state): Use have_fastchunks.
	(do_check_malloc_state): Remove incorrect invariant checks.
	(_int_malloc): Use have_fastchunks.
	(_int_free): Likewise.
	(malloc_consolidate): Likewise.

(cherry-picked from e956075a5a2044d05ce48b905b10270ed4a63e87)

diff --git a/ChangeLog b/ChangeLog
index 192acbf7d5..67d3503afe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2017-10-17  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* malloc/malloc.c (FASTCHUNKS_BIT): Remove.
+	(have_fastchunks): Remove.
+	(clear_fastchunks): Remove.
+	(set_fastchunks): Remove.
+	(malloc_state): Add have_fastchunks.
+	(malloc_init_state): Use have_fastchunks.
+	(do_check_malloc_state): Remove incorrect invariant checks.
+	(_int_malloc): Use have_fastchunks.
+	(_int_free): Likewise.
+	(malloc_consolidate): Likewise.
+
 2017-10-17  Wilco Dijkstra  <wdijkstr@arm.com>
 
 	* malloc/malloc.c (tcache_put): Inline.
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 546579bce4..37e3c44e72 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1612,27 +1612,6 @@ typedef struct malloc_chunk *mfastbinptr;
 
 #define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
 
-/*
-   Since the lowest 2 bits in max_fast don't matter in size comparisons,
-   they are used as flags.
- */
-
-/*
-   FASTCHUNKS_BIT held in max_fast indicates that there are probably
-   some fastbin chunks. It is set true on entering a chunk into any
-   fastbin, and cleared only in malloc_consolidate.
-
-   The truth value is inverted so that have_fastchunks will be true
-   upon startup (since statics are zero-filled), simplifying
-   initialization checks.
- */
-
-#define FASTCHUNKS_BIT        (1U)
-
-#define have_fastchunks(M)     (((M)->flags & FASTCHUNKS_BIT) == 0)
-#define clear_fastchunks(M)    catomic_or (&(M)->flags, FASTCHUNKS_BIT)
-#define set_fastchunks(M)      catomic_and (&(M)->flags, ~FASTCHUNKS_BIT)
-
 /*
    NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
    regions.  Otherwise, contiguity is exploited in merging together,
@@ -1690,6 +1669,17 @@ get_max_fast (void)
    ----------- Internal state representation and initialization -----------
  */
 
+/*
+   have_fastchunks indicates that there are probably some fastbin chunks.
+   It is set true on entering a chunk into any fastbin, and cleared early in
+   malloc_consolidate.  The value is approximate since it may be set when there
+   are no fastbin chunks, or it may be clear even if there are fastbin chunks
+   available.  Given it's sole purpose is to reduce number of redundant calls to
+   malloc_consolidate, it does not affect correctness.  As a result we can safely
+   use relaxed atomic accesses.
+ */
+
+
 struct malloc_state
 {
   /* Serialize access.  */
@@ -1698,6 +1688,9 @@ struct malloc_state
   /* Flags (formerly in max_fast).  */
   int flags;
 
+  /* Set if the fastbin chunks contain recently inserted free blocks.  */
+  bool have_fastchunks;
+
   /* Fastbins */
   mfastbinptr fastbinsY[NFASTBINS];
 
@@ -1841,7 +1834,7 @@ malloc_init_state (mstate av)
   set_noncontiguous (av);
   if (av == &main_arena)
     set_max_fast (DEFAULT_MXFAST);
-  av->flags |= FASTCHUNKS_BIT;
+  atomic_store_relaxed (&av->have_fastchunks, false);
 
   av->top = initial_top (av);
 }
@@ -2206,11 +2199,6 @@ do_check_malloc_state (mstate av)
         }
     }
 
-  if (total != 0)
-    assert (have_fastchunks (av));
-  else if (!have_fastchunks (av))
-    assert (total == 0);
-
   /* check normal bins */
   for (i = 1; i < NBINS; ++i)
     {
@@ -3701,7 +3689,7 @@ _int_malloc (mstate av, size_t bytes)
   else
     {
       idx = largebin_index (nb);
-      if (have_fastchunks (av))
+      if (atomic_load_relaxed (&av->have_fastchunks))
         malloc_consolidate (av);
     }
 
@@ -4116,7 +4104,7 @@ _int_malloc (mstate av, size_t bytes)
 
       /* When we are using atomic ops to free fast chunks we can get
          here for all block sizes.  */
-      else if (have_fastchunks (av))
+      else if (atomic_load_relaxed (&av->have_fastchunks))
         {
           malloc_consolidate (av);
           /* restore original bin index */
@@ -4242,7 +4230,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
 
     free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
 
-    set_fastchunks(av);
+    atomic_store_relaxed (&av->have_fastchunks, true);
     unsigned int idx = fastbin_index(size);
     fb = &fastbin (av, idx);
 
@@ -4393,7 +4381,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
     */
 
     if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
-      if (have_fastchunks(av))
+      if (atomic_load_relaxed (&av->have_fastchunks))
 	malloc_consolidate(av);
 
       if (av == &main_arena) {
@@ -4464,7 +4452,7 @@ static void malloc_consolidate(mstate av)
   */
 
   if (get_max_fast () != 0) {
-    clear_fastchunks(av);
+    atomic_store_relaxed (&av->have_fastchunks, false);
 
     unsorted_bin = unsorted_chunks(av);
 

debug log:

solving d1ca35f7c ...
found d1ca35f7c in https://yhetil.org/guix-patches/87ine0pjiu.fsf@fastmail.com/ ||
	https://yhetil.org/guix-patches/87d148pe57.fsf@fastmail.com/

applying [1/1] https://yhetil.org/guix-patches/87ine0pjiu.fsf@fastmail.com/
diff --git a/gnu/packages/patches/glibc-2-26-0083.patch b/gnu/packages/patches/glibc-2-26-0083.patch
new file mode 100644
index 000000000..d1ca35f7c

1:59: trailing whitespace.
 
1:60: space before tab in indent.
 	* malloc/malloc.c (tcache_put): Inline.
1:66: trailing whitespace.
 
1:68: trailing whitespace.
 
1:96: trailing whitespace.
 
Checking patch gnu/packages/patches/glibc-2-26-0083.patch...
Applied patch gnu/packages/patches/glibc-2-26-0083.patch cleanly.
warning: squelched 16 whitespace errors
warning: 21 lines add whitespace errors.

skipping https://yhetil.org/guix-patches/87d148pe57.fsf@fastmail.com/ for d1ca35f7c
index at:
100644 d1ca35f7caea366ef5001ace1c8fe1d0a2a3334c	gnu/packages/patches/glibc-2-26-0083.patch

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).