1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
| | From 6497aae57c77253b2d717b01f5ec17e137954395 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 12 Jul 2017 20:43:09 +0200
Subject: [PATCH 1/2] Use cpuid 4 with subleafs to query L1 cache size on Intel
processors
---
cpuid_x86.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 102 insertions(+), 15 deletions(-)
diff --git a/cpuid_x86.c b/cpuid_x86.c
index ab2ecdca..73b4df6b 100644
--- a/cpuid_x86.c
+++ b/cpuid_x86.c
@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
*edx = cpuInfo[3];
}
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
+{
+ int cpuInfo[4] = {-1};
+ __cpuidex(cpuInfo, op, count);
+ *eax = cpuInfo[0];
+ *ebx = cpuInfo[1];
+ *ecx = cpuInfo[2];
+ *edx = cpuInfo[3];
+}
+
#else
#ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
#else
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#endif
}
+
+static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
+#if defined(__i386__) && defined(__PIC__)
+ __asm__ __volatile__
+ ("mov %%ebx, %%edi;"
+ "cpuid;"
+ "xchgl %%ebx, %%edi;"
+ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
+#else
+ __asm__ __volatile__
+ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
+#endif
+}
#endif
#else
@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level > 1) {
-
+ int numcalls =0 ;
cpuid(2, &eax, &ebx, &ecx, &edx);
-
+ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
info[ 0] = BITMASK(eax, 8, 0xff);
info[ 1] = BITMASK(eax, 16, 0xff);
info[ 2] = BITMASK(eax, 24, 0xff);
@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
info[14] = BITMASK(edx, 24, 0xff);
for (i = 0; i < 15; i++){
-
switch (info[i]){
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.linesize = 64;
break;
case 0x63 :
- DTB.size = 2048;
- DTB.associative = 4;
- DTB.linesize = 32;
- LDTB.size = 4096;
- LDTB.associative= 4;
- LDTB.linesize = 32;
+ DTB.size = 2048;
+ DTB.associative = 4;
+ DTB.linesize = 32;
+ LDTB.size = 4096;
+ LDTB.associative= 4;
+ LDTB.linesize = 32;
+ break;
case 0x66 :
LD1.size = 8;
LD1.associative = 4;
@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.associative = 8;
break;
case 0x76 :
- ITB.size = 2048;
- ITB.associative = 0;
- ITB.linesize = 8;
- LITB.size = 4096;
- LITB.associative= 0;
- LITB.linesize = 8;
+ ITB.size = 2048;
+ ITB.associative = 0;
+ ITB.linesize = 8;
+ LITB.size = 4096;
+ LITB.associative= 0;
+ LITB.linesize = 8;
+ break;
case 0x77 :
LC1.size = 16;
LC1.associative = 4;
@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
}
if (get_vendor() == VENDOR_INTEL) {
+ if(LD1.size<=0 || LC1.size<=0){
+ //If we didn't detect L1 correctly before,
+ int count;
+ for (count=0;count <4;count++) {
+ cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
+ switch (eax &0x1f) {
+ case 0:
+ continue;
+ case 1:
+ case 3:
+ {
+ switch ((eax >>5) &0x07)
+ {
+ case 1:
+ {
+// fprintf(stderr,"L1 data cache...\n");
+ int sets = ecx+1;
+ int lines = (ebx & 0x0fff) +1;
+ ebx>>=12;
+ int part = (ebx&0x03ff)+1;
+ ebx >>=10;
+ int assoc = (ebx&0x03ff)+1;
+ LD1.size = (assoc*part*lines*sets)/1024;
+ LD1.associative = assoc;
+ LD1.linesize= lines;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+ }
+ case 2:
+ {
+ switch ((eax >>5) &0x07)
+ {
+ case 1:
+ {
+// fprintf(stderr,"L1 instruction cache...\n");
+ int sets = ecx+1;
+ int lines = (ebx & 0x0fff) +1;
+ ebx>>=12;
+ int part = (ebx&0x03ff)+1;
+ ebx >>=10;
+ int assoc = (ebx&0x03ff)+1;
+ LC1.size = (assoc*part*lines*sets)/1024;
+ LC1.associative = assoc;
+ LC1.linesize= lines;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+
+ }
+ default:
+ break;
+ }
+ }
+ }
+
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) {
if(L2.size<=0){
--
2.11.0
|