unofficial mirror of guix-patches@gnu.org 
 help / color / mirror / code / Atom feed
* [bug#29810] gnu: maths: Fix cache size detected by openblas on some
@ 2017-12-22 13:13 Dave Love
  2018-01-08  9:43 ` Ludovic Courtès
  0 siblings, 1 reply; 3+ messages in thread
From: Dave Love @ 2017-12-22 13:13 UTC (permalink / raw)
  To: 29810

[-- Attachment #1: Type: text/plain, Size: 275 bytes --]

This addresses a potential performance problem, fixed in the post-0.2.20
source.  It's intended for application to a package definition updated
to 0.2.20, which Ludo said is in the pipeline.  Apologies that I don't
seem to have converged on an acceptable style for changes.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-gnu-maths-Fix-cache-size-detected-by-openblas-on-som.patch --]
[-- Type: text/x-diff, Size: 9856 bytes --]

From 23ad3a438ef7bcd34e2354f6cbdede634f0188d4 Mon Sep 17 00:00:00 2001
From: Dave Love <fx@gnu.org>
Date: Fri, 22 Dec 2017 12:48:29 +0000
Subject: [PATCH 1/2] gnu: maths: Fix cache size detected by openblas on some
 x86_64.

* gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch,
gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch:
New files.
* gnu/packages/maths.scm (openblas)[source]: Use them.
* gnu/local.mk: Register them.
---
 gnu/local.mk                                       |   2 +
 gnu/packages/maths.scm                             |   6 +-
 ...mplementation-of-cpuid_count-for-the-CPUI.patch |  36 ++++
 ...-with-subleafs-to-query-L1-cache-size-on-.patch | 190 +++++++++++++++++++++
 4 files changed, 233 insertions(+), 1 deletion(-)
 create mode 100644 gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch
 create mode 100644 gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch

diff --git a/gnu/local.mk b/gnu/local.mk
index ac1c8ac2a..e69ccce34 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -930,6 +930,8 @@ dist_patch_DATA =						\
   %D%/packages/patches/omake-fix-non-determinism.patch	\
   %D%/packages/patches/ola-readdir-r.patch			\
   %D%/packages/patches/openscenegraph-ffmpeg3.patch             \
+  %D%/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch \
+  %D%/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch \
   %D%/packages/patches/openexr-missing-samples.patch		\
   %D%/packages/patches/openfoam-4.1-cleanup.patch			\
   %D%/packages/patches/openldap-CVE-2017-9287.patch		\
diff --git a/gnu/packages/maths.scm b/gnu/packages/maths.scm
index bb7dbee4b..8a48ec2fc 100644
--- a/gnu/packages/maths.scm
+++ b/gnu/packages/maths.scm
@@ -2629,7 +2629,11 @@ parts of it.")
        (file-name (string-append name "-" version ".tar.gz"))
        (sha256
         (base32
-         "1bd03c5xni0bla0wg1wba841b36b0sg13sjja955kn5xzvy4i61a"))))
+         "1bd03c5xni0bla0wg1wba841b36b0sg13sjja955kn5xzvy4i61a"))
+       (patches
+        (search-patches
+         "openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch"
+         "openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch"))))
     (build-system gnu-build-system)
     (arguments
      `(#:tests? #f  ;no "check" target
diff --git a/gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch b/gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch
new file mode 100644
index 000000000..d9cff2cf0
--- /dev/null
+++ b/gnu/packages/patches/openblas-Add-dummy-implementation-of-cpuid_count-for-the-CPUI.patch
@@ -0,0 +1,36 @@
+From 00774b1105ad5dbfe0e6be671096d51ad4a97b2e Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Wed, 12 Jul 2017 21:56:23 +0200
+Subject: [PATCH 2/2] Add dummy implementation of cpuid_count for the CPUIDEMU
+ case
+
+---
+ cpuid_x86.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/cpuid_x86.c b/cpuid_x86.c
+index 73b4df6b..103128a3 100644
+--- a/cpuid_x86.c
++++ b/cpuid_x86.c
+@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
+   *edx = idlist[current].d;
+ }
+ 
++void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
++  return cpuid (op, eax, ebx, ecx, edx);
++}
++
+ #endif
+ 
+ #endif // _MSC_VER
+@@ -977,7 +981,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
+         }
+       }
+     }
+-
+     cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
+     if (cpuid_level >= 0x80000006) {
+       if(L2.size<=0){
+-- 
+2.11.0
+
diff --git a/gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch b/gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch
new file mode 100644
index 000000000..a58c08e23
--- /dev/null
+++ b/gnu/packages/patches/openblas-Use-cpuid-4-with-subleafs-to-query-L1-cache-size-on-.patch
@@ -0,0 +1,190 @@
+From 6497aae57c77253b2d717b01f5ec17e137954395 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Wed, 12 Jul 2017 20:43:09 +0200
+Subject: [PATCH 1/2] Use cpuid 4 with subleafs to query L1 cache size on Intel
+ processors
+
+---
+ cpuid_x86.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 102 insertions(+), 15 deletions(-)
+
+diff --git a/cpuid_x86.c b/cpuid_x86.c
+index ab2ecdca..73b4df6b 100644
+--- a/cpuid_x86.c
++++ b/cpuid_x86.c
+@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+   *edx = cpuInfo[3];
+ }
+ 
++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
++{
++  int cpuInfo[4] = {-1};
++  __cpuidex(cpuInfo, op, count);
++  *eax = cpuInfo[0];
++  *ebx = cpuInfo[1];
++  *ecx = cpuInfo[2];
++  *edx = cpuInfo[3];
++}
++
+ #else
+ 
+ #ifndef CPUIDEMU
+ 
+ #if defined(__APPLE__) && defined(__i386__)
+ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
+ #else
+ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
+ #if defined(__i386__) && defined(__PIC__)
+@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
+     ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
+ #endif
+ }
++
++static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
++#if defined(__i386__) && defined(__PIC__)
++  __asm__ __volatile__
++    ("mov %%ebx, %%edi;"
++     "cpuid;"
++     "xchgl %%ebx, %%edi;"
++     : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
++#else
++  __asm__ __volatile__
++    ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
++#endif
++}
+ #endif
+ 
+ #else
+@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
+   cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
+ 
+   if (cpuid_level > 1) {
+-
++    int numcalls =0 ;
+     cpuid(2, &eax, &ebx, &ecx, &edx);
+-
++    numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
+     info[ 0] = BITMASK(eax,  8, 0xff);
+     info[ 1] = BITMASK(eax, 16, 0xff);
+     info[ 2] = BITMASK(eax, 24, 0xff);
+@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
+     info[14] = BITMASK(edx, 24, 0xff);
+ 
+     for (i = 0; i < 15; i++){
+-
+       switch (info[i]){
+ 
+ 	/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
+@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
+ 	LD1.linesize    = 64;
+ 	break;
+       case 0x63 :
+-  DTB.size        = 2048;
+-  DTB.associative = 4;
+-  DTB.linesize    = 32;
+-  LDTB.size       = 4096;
+-  LDTB.associative= 4;
+-  LDTB.linesize   = 32;
++  	DTB.size        = 2048;
++  	DTB.associative = 4;
++  	DTB.linesize    = 32;
++  	LDTB.size       = 4096;
++  	LDTB.associative= 4;
++  	LDTB.linesize   = 32;
++	break;
+       case 0x66 :
+ 	LD1.size        = 8;
+ 	LD1.associative = 4;
+@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
+ 	LC1.associative = 8;
+ 	break;
+       case 0x76 :
+-  ITB.size        = 2048;
+-  ITB.associative = 0;
+-  ITB.linesize    = 8;
+-  LITB.size       = 4096;
+-  LITB.associative= 0;
+-  LITB.linesize   = 8;
++  	ITB.size        = 2048;
++  	ITB.associative = 0;
++  	ITB.linesize    = 8;
++  	LITB.size       = 4096;
++  	LITB.associative= 0;
++  	LITB.linesize   = 8;
++	break;
+       case 0x77 :
+ 	LC1.size        = 16;
+ 	LC1.associative = 4;
+@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
+   }
+ 
+   if (get_vendor() == VENDOR_INTEL) {
++      if(LD1.size<=0 || LC1.size<=0){
++	//If we didn't detect L1 correctly before,
++	int count;
++	for (count=0;count <4;count++) {
++	cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
++        switch (eax &0x1f) {
++        case 0:
++          continue;
++          case 1:
++          case 3:
++          {
++            switch ((eax >>5) &0x07)
++            {
++            case 1:
++            {
++//            fprintf(stderr,"L1 data cache...\n");
++            int sets = ecx+1;
++            int lines = (ebx & 0x0fff) +1;
++            ebx>>=12;
++            int part = (ebx&0x03ff)+1;
++            ebx >>=10;
++            int assoc = (ebx&0x03ff)+1;
++            LD1.size = (assoc*part*lines*sets)/1024;
++            LD1.associative = assoc;
++            LD1.linesize= lines;
++            break;
++            }
++            default: 
++              break;
++           }
++          break;
++          }
++         case 2:
++          {
++            switch ((eax >>5) &0x07)
++            {
++            case 1:
++            {
++//            fprintf(stderr,"L1 instruction cache...\n");
++            int sets = ecx+1;
++            int lines = (ebx & 0x0fff) +1;
++            ebx>>=12;
++            int part = (ebx&0x03ff)+1;
++            ebx >>=10;
++            int assoc = (ebx&0x03ff)+1;
++            LC1.size = (assoc*part*lines*sets)/1024;
++            LC1.associative = assoc;
++            LC1.linesize= lines;
++            break;
++            }
++            default: 
++              break;
++           }
++          break;
++          
++          }
++          default:
++          break;
++        }
++      }
++    }
++
+     cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
+     if (cpuid_level >= 0x80000006) {
+       if(L2.size<=0){
+-- 
+2.11.0
+
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-02-12 23:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-22 13:13 [bug#29810] gnu: maths: Fix cache size detected by openblas on some Dave Love
2018-01-08  9:43 ` Ludovic Courtès
2019-02-12 23:21   ` bug#29810: " Leo Famulari

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).