xref: /linux/arch/x86/kernel/cpu/cacheinfo.c (revision ffc92cf3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/cpuhotplug.h>
15 #include <linux/sched.h>
16 #include <linux/capability.h>
17 #include <linux/sysfs.h>
18 #include <linux/pci.h>
19 #include <linux/stop_machine.h>
20 
21 #include <asm/cpufeature.h>
22 #include <asm/cacheinfo.h>
23 #include <asm/amd_nb.h>
24 #include <asm/smp.h>
25 #include <asm/mtrr.h>
26 #include <asm/tlbflush.h>
27 
28 #include "cpu.h"
29 
30 #define LVL_1_INST	1
31 #define LVL_1_DATA	2
32 #define LVL_2		3
33 #define LVL_3		4
34 #define LVL_TRACE	5
35 
36 /* Shared last level cache maps */
37 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
38 
39 /* Shared L2 cache maps */
40 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
41 
42 static cpumask_var_t cpu_cacheinfo_mask;
43 
44 /* Kernel controls MTRR and/or PAT MSRs. */
45 unsigned int memory_caching_control __ro_after_init;
46 
47 struct _cache_table {
48 	unsigned char descriptor;
49 	char cache_type;
50 	short size;
51 };
52 
53 #define MB(x)	((x) * 1024)
54 
55 /* All the cache descriptor types we care about (no TLB or
56    trace cache entries) */
57 
58 static const struct _cache_table cache_table[] =
59 {
60 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
61 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
62 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
63 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
64 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
65 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
66 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
67 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
68 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
69 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
70 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
71 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
72 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
73 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
74 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
75 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
76 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
77 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
78 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
79 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
80 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
81 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
82 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
83 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
84 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
85 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
86 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
87 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
88 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
89 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
90 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
91 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
92 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
93 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
94 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
95 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
96 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
97 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
98 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
99 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
100 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
101 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
102 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
103 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
104 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
105 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
106 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
107 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
108 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
109 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
110 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
111 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
112 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
113 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
114 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
115 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
116 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
117 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
118 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
119 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
120 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
121 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
122 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
123 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
124 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
125 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
126 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
127 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
128 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
129 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
130 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
131 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
132 	{ 0x00, 0, 0}
133 };
134 
135 
136 enum _cache_type {
137 	CTYPE_NULL = 0,
138 	CTYPE_DATA = 1,
139 	CTYPE_INST = 2,
140 	CTYPE_UNIFIED = 3
141 };
142 
143 union _cpuid4_leaf_eax {
144 	struct {
145 		enum _cache_type	type:5;
146 		unsigned int		level:3;
147 		unsigned int		is_self_initializing:1;
148 		unsigned int		is_fully_associative:1;
149 		unsigned int		reserved:4;
150 		unsigned int		num_threads_sharing:12;
151 		unsigned int		num_cores_on_die:6;
152 	} split;
153 	u32 full;
154 };
155 
156 union _cpuid4_leaf_ebx {
157 	struct {
158 		unsigned int		coherency_line_size:12;
159 		unsigned int		physical_line_partition:10;
160 		unsigned int		ways_of_associativity:10;
161 	} split;
162 	u32 full;
163 };
164 
165 union _cpuid4_leaf_ecx {
166 	struct {
167 		unsigned int		number_of_sets:32;
168 	} split;
169 	u32 full;
170 };
171 
172 struct _cpuid4_info_regs {
173 	union _cpuid4_leaf_eax eax;
174 	union _cpuid4_leaf_ebx ebx;
175 	union _cpuid4_leaf_ecx ecx;
176 	unsigned int id;
177 	unsigned long size;
178 	struct amd_northbridge *nb;
179 };
180 
181 static unsigned short num_cache_leaves;
182 
183 /* AMD doesn't have CPUID4. Emulate it here to report the same
184    information to the user.  This makes some assumptions about the machine:
185    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
186 
187    In theory the TLBs could be reported as fake type (they are in "dummy").
188    Maybe later */
189 union l1_cache {
190 	struct {
191 		unsigned line_size:8;
192 		unsigned lines_per_tag:8;
193 		unsigned assoc:8;
194 		unsigned size_in_kb:8;
195 	};
196 	unsigned val;
197 };
198 
199 union l2_cache {
200 	struct {
201 		unsigned line_size:8;
202 		unsigned lines_per_tag:4;
203 		unsigned assoc:4;
204 		unsigned size_in_kb:16;
205 	};
206 	unsigned val;
207 };
208 
209 union l3_cache {
210 	struct {
211 		unsigned line_size:8;
212 		unsigned lines_per_tag:4;
213 		unsigned assoc:4;
214 		unsigned res:2;
215 		unsigned size_encoded:14;
216 	};
217 	unsigned val;
218 };
219 
220 static const unsigned short assocs[] = {
221 	[1] = 1,
222 	[2] = 2,
223 	[4] = 4,
224 	[6] = 8,
225 	[8] = 16,
226 	[0xa] = 32,
227 	[0xb] = 48,
228 	[0xc] = 64,
229 	[0xd] = 96,
230 	[0xe] = 128,
231 	[0xf] = 0xffff /* fully associative - no way to show this currently */
232 };
233 
234 static const unsigned char levels[] = { 1, 1, 2, 3 };
235 static const unsigned char types[] = { 1, 2, 3, 3 };
236 
237 static const enum cache_type cache_type_map[] = {
238 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
239 	[CTYPE_DATA] = CACHE_TYPE_DATA,
240 	[CTYPE_INST] = CACHE_TYPE_INST,
241 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
242 };
243 
244 static void
amd_cpuid4(int leaf,union _cpuid4_leaf_eax * eax,union _cpuid4_leaf_ebx * ebx,union _cpuid4_leaf_ecx * ecx)245 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
246 		     union _cpuid4_leaf_ebx *ebx,
247 		     union _cpuid4_leaf_ecx *ecx)
248 {
249 	unsigned dummy;
250 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
251 	union l1_cache l1i, l1d;
252 	union l2_cache l2;
253 	union l3_cache l3;
254 	union l1_cache *l1 = &l1d;
255 
256 	eax->full = 0;
257 	ebx->full = 0;
258 	ecx->full = 0;
259 
260 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
261 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
262 
263 	switch (leaf) {
264 	case 1:
265 		l1 = &l1i;
266 		fallthrough;
267 	case 0:
268 		if (!l1->val)
269 			return;
270 		assoc = assocs[l1->assoc];
271 		line_size = l1->line_size;
272 		lines_per_tag = l1->lines_per_tag;
273 		size_in_kb = l1->size_in_kb;
274 		break;
275 	case 2:
276 		if (!l2.val)
277 			return;
278 		assoc = assocs[l2.assoc];
279 		line_size = l2.line_size;
280 		lines_per_tag = l2.lines_per_tag;
281 		/* cpu_data has errata corrections for K7 applied */
282 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
283 		break;
284 	case 3:
285 		if (!l3.val)
286 			return;
287 		assoc = assocs[l3.assoc];
288 		line_size = l3.line_size;
289 		lines_per_tag = l3.lines_per_tag;
290 		size_in_kb = l3.size_encoded * 512;
291 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
292 			size_in_kb = size_in_kb >> 1;
293 			assoc = assoc >> 1;
294 		}
295 		break;
296 	default:
297 		return;
298 	}
299 
300 	eax->split.is_self_initializing = 1;
301 	eax->split.type = types[leaf];
302 	eax->split.level = levels[leaf];
303 	eax->split.num_threads_sharing = 0;
304 	eax->split.num_cores_on_die = topology_num_cores_per_package();
305 
306 
307 	if (assoc == 0xffff)
308 		eax->split.is_fully_associative = 1;
309 	ebx->split.coherency_line_size = line_size - 1;
310 	ebx->split.ways_of_associativity = assoc - 1;
311 	ebx->split.physical_line_partition = lines_per_tag - 1;
312 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
313 		(ebx->split.ways_of_associativity + 1) - 1;
314 }
315 
316 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
317 
318 /*
319  * L3 cache descriptors
320  */
amd_calc_l3_indices(struct amd_northbridge * nb)321 static void amd_calc_l3_indices(struct amd_northbridge *nb)
322 {
323 	struct amd_l3_cache *l3 = &nb->l3_cache;
324 	unsigned int sc0, sc1, sc2, sc3;
325 	u32 val = 0;
326 
327 	pci_read_config_dword(nb->misc, 0x1C4, &val);
328 
329 	/* calculate subcache sizes */
330 	l3->subcaches[0] = sc0 = !(val & BIT(0));
331 	l3->subcaches[1] = sc1 = !(val & BIT(4));
332 
333 	if (boot_cpu_data.x86 == 0x15) {
334 		l3->subcaches[0] = sc0 += !(val & BIT(1));
335 		l3->subcaches[1] = sc1 += !(val & BIT(5));
336 	}
337 
338 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
339 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
340 
341 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
342 }
343 
344 /*
345  * check whether a slot used for disabling an L3 index is occupied.
346  * @l3: L3 cache descriptor
347  * @slot: slot number (0..1)
348  *
349  * @returns: the disabled index if used or negative value if slot free.
350  */
amd_get_l3_disable_slot(struct amd_northbridge * nb,unsigned slot)351 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
352 {
353 	unsigned int reg = 0;
354 
355 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
356 
357 	/* check whether this slot is activated already */
358 	if (reg & (3UL << 30))
359 		return reg & 0xfff;
360 
361 	return -1;
362 }
363 
show_cache_disable(struct cacheinfo * this_leaf,char * buf,unsigned int slot)364 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
365 				  unsigned int slot)
366 {
367 	int index;
368 	struct amd_northbridge *nb = this_leaf->priv;
369 
370 	index = amd_get_l3_disable_slot(nb, slot);
371 	if (index >= 0)
372 		return sprintf(buf, "%d\n", index);
373 
374 	return sprintf(buf, "FREE\n");
375 }
376 
377 #define SHOW_CACHE_DISABLE(slot)					\
378 static ssize_t								\
379 cache_disable_##slot##_show(struct device *dev,				\
380 			    struct device_attribute *attr, char *buf)	\
381 {									\
382 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
383 	return show_cache_disable(this_leaf, buf, slot);		\
384 }
385 SHOW_CACHE_DISABLE(0)
386 SHOW_CACHE_DISABLE(1)
387 
amd_l3_disable_index(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long idx)388 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
389 				 unsigned slot, unsigned long idx)
390 {
391 	int i;
392 
393 	idx |= BIT(30);
394 
395 	/*
396 	 *  disable index in all 4 subcaches
397 	 */
398 	for (i = 0; i < 4; i++) {
399 		u32 reg = idx | (i << 20);
400 
401 		if (!nb->l3_cache.subcaches[i])
402 			continue;
403 
404 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
405 
406 		/*
407 		 * We need to WBINVD on a core on the node containing the L3
408 		 * cache which indices we disable therefore a simple wbinvd()
409 		 * is not sufficient.
410 		 */
411 		wbinvd_on_cpu(cpu);
412 
413 		reg |= BIT(31);
414 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
415 	}
416 }
417 
418 /*
419  * disable a L3 cache index by using a disable-slot
420  *
421  * @l3:    L3 cache descriptor
422  * @cpu:   A CPU on the node containing the L3 cache
423  * @slot:  slot number (0..1)
424  * @index: index to disable
425  *
426  * @return: 0 on success, error status on failure
427  */
amd_set_l3_disable_slot(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long index)428 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
429 			    unsigned slot, unsigned long index)
430 {
431 	int ret = 0;
432 
433 	/*  check if @slot is already used or the index is already disabled */
434 	ret = amd_get_l3_disable_slot(nb, slot);
435 	if (ret >= 0)
436 		return -EEXIST;
437 
438 	if (index > nb->l3_cache.indices)
439 		return -EINVAL;
440 
441 	/* check whether the other slot has disabled the same index already */
442 	if (index == amd_get_l3_disable_slot(nb, !slot))
443 		return -EEXIST;
444 
445 	amd_l3_disable_index(nb, cpu, slot, index);
446 
447 	return 0;
448 }
449 
store_cache_disable(struct cacheinfo * this_leaf,const char * buf,size_t count,unsigned int slot)450 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
451 				   const char *buf, size_t count,
452 				   unsigned int slot)
453 {
454 	unsigned long val = 0;
455 	int cpu, err = 0;
456 	struct amd_northbridge *nb = this_leaf->priv;
457 
458 	if (!capable(CAP_SYS_ADMIN))
459 		return -EPERM;
460 
461 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
462 
463 	if (kstrtoul(buf, 10, &val) < 0)
464 		return -EINVAL;
465 
466 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
467 	if (err) {
468 		if (err == -EEXIST)
469 			pr_warn("L3 slot %d in use/index already disabled!\n",
470 				   slot);
471 		return err;
472 	}
473 	return count;
474 }
475 
476 #define STORE_CACHE_DISABLE(slot)					\
477 static ssize_t								\
478 cache_disable_##slot##_store(struct device *dev,			\
479 			     struct device_attribute *attr,		\
480 			     const char *buf, size_t count)		\
481 {									\
482 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
483 	return store_cache_disable(this_leaf, buf, count, slot);	\
484 }
485 STORE_CACHE_DISABLE(0)
486 STORE_CACHE_DISABLE(1)
487 
subcaches_show(struct device * dev,struct device_attribute * attr,char * buf)488 static ssize_t subcaches_show(struct device *dev,
489 			      struct device_attribute *attr, char *buf)
490 {
491 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
492 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
493 
494 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
495 }
496 
subcaches_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)497 static ssize_t subcaches_store(struct device *dev,
498 			       struct device_attribute *attr,
499 			       const char *buf, size_t count)
500 {
501 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
502 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
503 	unsigned long val;
504 
505 	if (!capable(CAP_SYS_ADMIN))
506 		return -EPERM;
507 
508 	if (kstrtoul(buf, 16, &val) < 0)
509 		return -EINVAL;
510 
511 	if (amd_set_subcaches(cpu, val))
512 		return -EINVAL;
513 
514 	return count;
515 }
516 
517 static DEVICE_ATTR_RW(cache_disable_0);
518 static DEVICE_ATTR_RW(cache_disable_1);
519 static DEVICE_ATTR_RW(subcaches);
520 
521 static umode_t
cache_private_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int unused)522 cache_private_attrs_is_visible(struct kobject *kobj,
523 			       struct attribute *attr, int unused)
524 {
525 	struct device *dev = kobj_to_dev(kobj);
526 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
527 	umode_t mode = attr->mode;
528 
529 	if (!this_leaf->priv)
530 		return 0;
531 
532 	if ((attr == &dev_attr_subcaches.attr) &&
533 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
534 		return mode;
535 
536 	if ((attr == &dev_attr_cache_disable_0.attr ||
537 	     attr == &dev_attr_cache_disable_1.attr) &&
538 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
539 		return mode;
540 
541 	return 0;
542 }
543 
544 static struct attribute_group cache_private_group = {
545 	.is_visible = cache_private_attrs_is_visible,
546 };
547 
init_amd_l3_attrs(void)548 static void init_amd_l3_attrs(void)
549 {
550 	int n = 1;
551 	static struct attribute **amd_l3_attrs;
552 
553 	if (amd_l3_attrs) /* already initialized */
554 		return;
555 
556 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
557 		n += 2;
558 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
559 		n += 1;
560 
561 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
562 	if (!amd_l3_attrs)
563 		return;
564 
565 	n = 0;
566 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
567 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
568 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
569 	}
570 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
571 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
572 
573 	cache_private_group.attrs = amd_l3_attrs;
574 }
575 
576 const struct attribute_group *
cache_get_priv_group(struct cacheinfo * this_leaf)577 cache_get_priv_group(struct cacheinfo *this_leaf)
578 {
579 	struct amd_northbridge *nb = this_leaf->priv;
580 
581 	if (this_leaf->level < 3 || !nb)
582 		return NULL;
583 
584 	if (nb && nb->l3_cache.indices)
585 		init_amd_l3_attrs();
586 
587 	return &cache_private_group;
588 }
589 
amd_init_l3_cache(struct _cpuid4_info_regs * this_leaf,int index)590 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
591 {
592 	int node;
593 
594 	/* only for L3, and not in virtualized environments */
595 	if (index < 3)
596 		return;
597 
598 	node = topology_amd_node_id(smp_processor_id());
599 	this_leaf->nb = node_to_amd_nb(node);
600 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
601 		amd_calc_l3_indices(this_leaf->nb);
602 }
603 #else
604 #define amd_init_l3_cache(x, y)
605 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
606 
607 static int
cpuid4_cache_lookup_regs(int index,struct _cpuid4_info_regs * this_leaf)608 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
609 {
610 	union _cpuid4_leaf_eax	eax;
611 	union _cpuid4_leaf_ebx	ebx;
612 	union _cpuid4_leaf_ecx	ecx;
613 	unsigned		edx;
614 
615 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
616 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
617 			cpuid_count(0x8000001d, index, &eax.full,
618 				    &ebx.full, &ecx.full, &edx);
619 		else
620 			amd_cpuid4(index, &eax, &ebx, &ecx);
621 		amd_init_l3_cache(this_leaf, index);
622 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
623 		cpuid_count(0x8000001d, index, &eax.full,
624 			    &ebx.full, &ecx.full, &edx);
625 		amd_init_l3_cache(this_leaf, index);
626 	} else {
627 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
628 	}
629 
630 	if (eax.split.type == CTYPE_NULL)
631 		return -EIO; /* better error ? */
632 
633 	this_leaf->eax = eax;
634 	this_leaf->ebx = ebx;
635 	this_leaf->ecx = ecx;
636 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
637 			  (ebx.split.coherency_line_size     + 1) *
638 			  (ebx.split.physical_line_partition + 1) *
639 			  (ebx.split.ways_of_associativity   + 1);
640 	return 0;
641 }
642 
find_num_cache_leaves(struct cpuinfo_x86 * c)643 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
644 {
645 	unsigned int		eax, ebx, ecx, edx, op;
646 	union _cpuid4_leaf_eax	cache_eax;
647 	int 			i = -1;
648 
649 	if (c->x86_vendor == X86_VENDOR_AMD ||
650 	    c->x86_vendor == X86_VENDOR_HYGON)
651 		op = 0x8000001d;
652 	else
653 		op = 4;
654 
655 	do {
656 		++i;
657 		/* Do cpuid(op) loop to find out num_cache_leaves */
658 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
659 		cache_eax.full = eax;
660 	} while (cache_eax.split.type != CTYPE_NULL);
661 	return i;
662 }
663 
cacheinfo_amd_init_llc_id(struct cpuinfo_x86 * c,u16 die_id)664 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
665 {
666 	/*
667 	 * We may have multiple LLCs if L3 caches exist, so check if we
668 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
669 	 */
670 	if (!cpuid_edx(0x80000006))
671 		return;
672 
673 	if (c->x86 < 0x17) {
674 		/* LLC is at the node level. */
675 		c->topo.llc_id = die_id;
676 	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
677 		/*
678 		 * LLC is at the core complex level.
679 		 * Core complex ID is ApicId[3] for these processors.
680 		 */
681 		c->topo.llc_id = c->topo.apicid >> 3;
682 	} else {
683 		/*
684 		 * LLC ID is calculated from the number of threads sharing the
685 		 * cache.
686 		 * */
687 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
688 		u32 llc_index = find_num_cache_leaves(c) - 1;
689 
690 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
691 		if (eax)
692 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
693 
694 		if (num_sharing_cache) {
695 			int bits = get_count_order(num_sharing_cache);
696 
697 			c->topo.llc_id = c->topo.apicid >> bits;
698 		}
699 	}
700 }
701 
cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 * c)702 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
703 {
704 	/*
705 	 * We may have multiple LLCs if L3 caches exist, so check if we
706 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
707 	 */
708 	if (!cpuid_edx(0x80000006))
709 		return;
710 
711 	/*
712 	 * LLC is at the core complex level.
713 	 * Core complex ID is ApicId[3] for these processors.
714 	 */
715 	c->topo.llc_id = c->topo.apicid >> 3;
716 }
717 
init_amd_cacheinfo(struct cpuinfo_x86 * c)718 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
719 {
720 
721 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
722 		num_cache_leaves = find_num_cache_leaves(c);
723 	} else if (c->extended_cpuid_level >= 0x80000006) {
724 		if (cpuid_edx(0x80000006) & 0xf000)
725 			num_cache_leaves = 4;
726 		else
727 			num_cache_leaves = 3;
728 	}
729 }
730 
init_hygon_cacheinfo(struct cpuinfo_x86 * c)731 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
732 {
733 	num_cache_leaves = find_num_cache_leaves(c);
734 }
735 
init_intel_cacheinfo(struct cpuinfo_x86 * c)736 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
737 {
738 	/* Cache sizes */
739 	unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
740 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
741 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
742 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
743 
744 	if (c->cpuid_level > 3) {
745 		static int is_initialized;
746 
747 		if (is_initialized == 0) {
748 			/* Init num_cache_leaves from boot CPU */
749 			num_cache_leaves = find_num_cache_leaves(c);
750 			is_initialized++;
751 		}
752 
753 		/*
754 		 * Whenever possible use cpuid(4), deterministic cache
755 		 * parameters cpuid leaf to find the cache details
756 		 */
757 		for (i = 0; i < num_cache_leaves; i++) {
758 			struct _cpuid4_info_regs this_leaf = {};
759 			int retval;
760 
761 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
762 			if (retval < 0)
763 				continue;
764 
765 			switch (this_leaf.eax.split.level) {
766 			case 1:
767 				if (this_leaf.eax.split.type == CTYPE_DATA)
768 					new_l1d = this_leaf.size/1024;
769 				else if (this_leaf.eax.split.type == CTYPE_INST)
770 					new_l1i = this_leaf.size/1024;
771 				break;
772 			case 2:
773 				new_l2 = this_leaf.size/1024;
774 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
775 				index_msb = get_count_order(num_threads_sharing);
776 				l2_id = c->topo.apicid & ~((1 << index_msb) - 1);
777 				break;
778 			case 3:
779 				new_l3 = this_leaf.size/1024;
780 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
781 				index_msb = get_count_order(num_threads_sharing);
782 				l3_id = c->topo.apicid & ~((1 << index_msb) - 1);
783 				break;
784 			default:
785 				break;
786 			}
787 		}
788 	}
789 	/*
790 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
791 	 * trace cache
792 	 */
793 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
794 		/* supports eax=2  call */
795 		int j, n;
796 		unsigned int regs[4];
797 		unsigned char *dp = (unsigned char *)regs;
798 		int only_trace = 0;
799 
800 		if (num_cache_leaves != 0 && c->x86 == 15)
801 			only_trace = 1;
802 
803 		/* Number of times to iterate */
804 		n = cpuid_eax(2) & 0xFF;
805 
806 		for (i = 0 ; i < n ; i++) {
807 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
808 
809 			/* If bit 31 is set, this is an unknown format */
810 			for (j = 0 ; j < 3 ; j++)
811 				if (regs[j] & (1 << 31))
812 					regs[j] = 0;
813 
814 			/* Byte 0 is level count, not a descriptor */
815 			for (j = 1 ; j < 16 ; j++) {
816 				unsigned char des = dp[j];
817 				unsigned char k = 0;
818 
819 				/* look up this descriptor in the table */
820 				while (cache_table[k].descriptor != 0) {
821 					if (cache_table[k].descriptor == des) {
822 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
823 							break;
824 						switch (cache_table[k].cache_type) {
825 						case LVL_1_INST:
826 							l1i += cache_table[k].size;
827 							break;
828 						case LVL_1_DATA:
829 							l1d += cache_table[k].size;
830 							break;
831 						case LVL_2:
832 							l2 += cache_table[k].size;
833 							break;
834 						case LVL_3:
835 							l3 += cache_table[k].size;
836 							break;
837 						}
838 
839 						break;
840 					}
841 
842 					k++;
843 				}
844 			}
845 		}
846 	}
847 
848 	if (new_l1d)
849 		l1d = new_l1d;
850 
851 	if (new_l1i)
852 		l1i = new_l1i;
853 
854 	if (new_l2) {
855 		l2 = new_l2;
856 		c->topo.llc_id = l2_id;
857 		c->topo.l2c_id = l2_id;
858 	}
859 
860 	if (new_l3) {
861 		l3 = new_l3;
862 		c->topo.llc_id = l3_id;
863 	}
864 
865 	/*
866 	 * If llc_id is not yet set, this means cpuid_level < 4 which in
867 	 * turns means that the only possibility is SMT (as indicated in
868 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
869 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
870 	 * c->topo.pkg_id.
871 	 */
872 	if (c->topo.llc_id == BAD_APICID)
873 		c->topo.llc_id = c->topo.pkg_id;
874 
875 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
876 
877 	if (!l2)
878 		cpu_detect_cache_sizes(c);
879 }
880 
__cache_amd_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)881 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
882 				    struct _cpuid4_info_regs *base)
883 {
884 	struct cpu_cacheinfo *this_cpu_ci;
885 	struct cacheinfo *this_leaf;
886 	int i, sibling;
887 
888 	/*
889 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
890 	 * to derive shared_cpu_map.
891 	 */
892 	if (index == 3) {
893 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
894 			this_cpu_ci = get_cpu_cacheinfo(i);
895 			if (!this_cpu_ci->info_list)
896 				continue;
897 			this_leaf = this_cpu_ci->info_list + index;
898 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
899 				if (!cpu_online(sibling))
900 					continue;
901 				cpumask_set_cpu(sibling,
902 						&this_leaf->shared_cpu_map);
903 			}
904 		}
905 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
906 		unsigned int apicid, nshared, first, last;
907 
908 		nshared = base->eax.split.num_threads_sharing + 1;
909 		apicid = cpu_data(cpu).topo.apicid;
910 		first = apicid - (apicid % nshared);
911 		last = first + nshared - 1;
912 
913 		for_each_online_cpu(i) {
914 			this_cpu_ci = get_cpu_cacheinfo(i);
915 			if (!this_cpu_ci->info_list)
916 				continue;
917 
918 			apicid = cpu_data(i).topo.apicid;
919 			if ((apicid < first) || (apicid > last))
920 				continue;
921 
922 			this_leaf = this_cpu_ci->info_list + index;
923 
924 			for_each_online_cpu(sibling) {
925 				apicid = cpu_data(sibling).topo.apicid;
926 				if ((apicid < first) || (apicid > last))
927 					continue;
928 				cpumask_set_cpu(sibling,
929 						&this_leaf->shared_cpu_map);
930 			}
931 		}
932 	} else
933 		return 0;
934 
935 	return 1;
936 }
937 
__cache_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)938 static void __cache_cpumap_setup(unsigned int cpu, int index,
939 				 struct _cpuid4_info_regs *base)
940 {
941 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
942 	struct cacheinfo *this_leaf, *sibling_leaf;
943 	unsigned long num_threads_sharing;
944 	int index_msb, i;
945 	struct cpuinfo_x86 *c = &cpu_data(cpu);
946 
947 	if (c->x86_vendor == X86_VENDOR_AMD ||
948 	    c->x86_vendor == X86_VENDOR_HYGON) {
949 		if (__cache_amd_cpumap_setup(cpu, index, base))
950 			return;
951 	}
952 
953 	this_leaf = this_cpu_ci->info_list + index;
954 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
955 
956 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
957 	if (num_threads_sharing == 1)
958 		return;
959 
960 	index_msb = get_count_order(num_threads_sharing);
961 
962 	for_each_online_cpu(i)
963 		if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
964 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
965 
966 			if (i == cpu || !sib_cpu_ci->info_list)
967 				continue;/* skip if itself or no cacheinfo */
968 			sibling_leaf = sib_cpu_ci->info_list + index;
969 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
970 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
971 		}
972 }
973 
ci_leaf_init(struct cacheinfo * this_leaf,struct _cpuid4_info_regs * base)974 static void ci_leaf_init(struct cacheinfo *this_leaf,
975 			 struct _cpuid4_info_regs *base)
976 {
977 	this_leaf->id = base->id;
978 	this_leaf->attributes = CACHE_ID;
979 	this_leaf->level = base->eax.split.level;
980 	this_leaf->type = cache_type_map[base->eax.split.type];
981 	this_leaf->coherency_line_size =
982 				base->ebx.split.coherency_line_size + 1;
983 	this_leaf->ways_of_associativity =
984 				base->ebx.split.ways_of_associativity + 1;
985 	this_leaf->size = base->size;
986 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
987 	this_leaf->physical_line_partition =
988 				base->ebx.split.physical_line_partition + 1;
989 	this_leaf->priv = base->nb;
990 }
991 
init_cache_level(unsigned int cpu)992 int init_cache_level(unsigned int cpu)
993 {
994 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
995 
996 	if (!num_cache_leaves)
997 		return -ENOENT;
998 	if (!this_cpu_ci)
999 		return -EINVAL;
1000 	this_cpu_ci->num_levels = 3;
1001 	this_cpu_ci->num_leaves = num_cache_leaves;
1002 	return 0;
1003 }
1004 
1005 /*
1006  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1007  * ECX as cache index. Then right shift apicid by the number's order to get
1008  * cache id for this cache node.
1009  */
get_cache_id(int cpu,struct _cpuid4_info_regs * id4_regs)1010 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1011 {
1012 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1013 	unsigned long num_threads_sharing;
1014 	int index_msb;
1015 
1016 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1017 	index_msb = get_count_order(num_threads_sharing);
1018 	id4_regs->id = c->topo.apicid >> index_msb;
1019 }
1020 
populate_cache_leaves(unsigned int cpu)1021 int populate_cache_leaves(unsigned int cpu)
1022 {
1023 	unsigned int idx, ret;
1024 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1025 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1026 	struct _cpuid4_info_regs id4_regs = {};
1027 
1028 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1029 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1030 		if (ret)
1031 			return ret;
1032 		get_cache_id(cpu, &id4_regs);
1033 		ci_leaf_init(this_leaf++, &id4_regs);
1034 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1035 	}
1036 	this_cpu_ci->cpu_map_populated = true;
1037 
1038 	return 0;
1039 }
1040 
1041 /*
1042  * Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
1043  *
1044  * Since we are disabling the cache don't allow any interrupts,
1045  * they would run extremely slow and would only increase the pain.
1046  *
1047  * The caller must ensure that local interrupts are disabled and
1048  * are reenabled after cache_enable() has been called.
1049  */
1050 static unsigned long saved_cr4;
1051 static DEFINE_RAW_SPINLOCK(cache_disable_lock);
1052 
cache_disable(void)1053 void cache_disable(void) __acquires(cache_disable_lock)
1054 {
1055 	unsigned long cr0;
1056 
1057 	/*
1058 	 * Note that this is not ideal
1059 	 * since the cache is only flushed/disabled for this CPU while the
1060 	 * MTRRs are changed, but changing this requires more invasive
1061 	 * changes to the way the kernel boots
1062 	 */
1063 
1064 	raw_spin_lock(&cache_disable_lock);
1065 
1066 	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
1067 	cr0 = read_cr0() | X86_CR0_CD;
1068 	write_cr0(cr0);
1069 
1070 	/*
1071 	 * Cache flushing is the most time-consuming step when programming
1072 	 * the MTRRs. Fortunately, as per the Intel Software Development
1073 	 * Manual, we can skip it if the processor supports cache self-
1074 	 * snooping.
1075 	 */
1076 	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1077 		wbinvd();
1078 
1079 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
1080 	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
1081 		saved_cr4 = __read_cr4();
1082 		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
1083 	}
1084 
1085 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
1086 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1087 	flush_tlb_local();
1088 
1089 	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1090 		mtrr_disable();
1091 
1092 	/* Again, only flush caches if we have to. */
1093 	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1094 		wbinvd();
1095 }
1096 
cache_enable(void)1097 void cache_enable(void) __releases(cache_disable_lock)
1098 {
1099 	/* Flush TLBs (no need to flush caches - they are disabled) */
1100 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1101 	flush_tlb_local();
1102 
1103 	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1104 		mtrr_enable();
1105 
1106 	/* Enable caches */
1107 	write_cr0(read_cr0() & ~X86_CR0_CD);
1108 
1109 	/* Restore value of CR4 */
1110 	if (cpu_feature_enabled(X86_FEATURE_PGE))
1111 		__write_cr4(saved_cr4);
1112 
1113 	raw_spin_unlock(&cache_disable_lock);
1114 }
1115 
cache_cpu_init(void)1116 static void cache_cpu_init(void)
1117 {
1118 	unsigned long flags;
1119 
1120 	local_irq_save(flags);
1121 
1122 	if (memory_caching_control & CACHE_MTRR) {
1123 		cache_disable();
1124 		mtrr_generic_set_state();
1125 		cache_enable();
1126 	}
1127 
1128 	if (memory_caching_control & CACHE_PAT)
1129 		pat_cpu_init();
1130 
1131 	local_irq_restore(flags);
1132 }
1133 
1134 static bool cache_aps_delayed_init = true;
1135 
set_cache_aps_delayed_init(bool val)1136 void set_cache_aps_delayed_init(bool val)
1137 {
1138 	cache_aps_delayed_init = val;
1139 }
1140 
get_cache_aps_delayed_init(void)1141 bool get_cache_aps_delayed_init(void)
1142 {
1143 	return cache_aps_delayed_init;
1144 }
1145 
cache_rendezvous_handler(void * unused)1146 static int cache_rendezvous_handler(void *unused)
1147 {
1148 	if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
1149 		cache_cpu_init();
1150 
1151 	return 0;
1152 }
1153 
cache_bp_init(void)1154 void __init cache_bp_init(void)
1155 {
1156 	mtrr_bp_init();
1157 	pat_bp_init();
1158 
1159 	if (memory_caching_control)
1160 		cache_cpu_init();
1161 }
1162 
cache_bp_restore(void)1163 void cache_bp_restore(void)
1164 {
1165 	if (memory_caching_control)
1166 		cache_cpu_init();
1167 }
1168 
cache_ap_online(unsigned int cpu)1169 static int cache_ap_online(unsigned int cpu)
1170 {
1171 	cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
1172 
1173 	if (!memory_caching_control || get_cache_aps_delayed_init())
1174 		return 0;
1175 
1176 	/*
1177 	 * Ideally we should hold mtrr_mutex here to avoid MTRR entries
1178 	 * changed, but this routine will be called in CPU boot time,
1179 	 * holding the lock breaks it.
1180 	 *
1181 	 * This routine is called in two cases:
1182 	 *
1183 	 *   1. very early time of software resume, when there absolutely
1184 	 *      isn't MTRR entry changes;
1185 	 *
1186 	 *   2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
1187 	 *      lock to prevent MTRR entry changes
1188 	 */
1189 	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
1190 				       cpu_cacheinfo_mask);
1191 
1192 	return 0;
1193 }
1194 
cache_ap_offline(unsigned int cpu)1195 static int cache_ap_offline(unsigned int cpu)
1196 {
1197 	cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
1198 	return 0;
1199 }
1200 
1201 /*
1202  * Delayed cache initialization for all AP's
1203  */
cache_aps_init(void)1204 void cache_aps_init(void)
1205 {
1206 	if (!memory_caching_control || !get_cache_aps_delayed_init())
1207 		return;
1208 
1209 	stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
1210 	set_cache_aps_delayed_init(false);
1211 }
1212 
cache_ap_register(void)1213 static int __init cache_ap_register(void)
1214 {
1215 	zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
1216 	cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
1217 
1218 	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
1219 				  "x86/cachectrl:starting",
1220 				  cache_ap_online, cache_ap_offline);
1221 	return 0;
1222 }
1223 early_initcall(cache_ap_register);
1224