1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fill_buf benchmark
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include <stdio.h>
12 #include <unistd.h>
13 #include <stdlib.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 #include <inttypes.h>
17 #include <string.h>
18 
19 #include "resctrl.h"
20 
21 #define CL_SIZE			(64)
22 #define PAGE_SIZE		(4 * 1024)
23 #define MB			(1024 * 1024)
24 
25 static void sb(void)
26 {
27 #if defined(__i386) || defined(__x86_64)
28 	asm volatile("sfence\n\t"
29 		     : : : "memory");
30 #endif
31 }
32 
33 static void cl_flush(void *p)
34 {
35 #if defined(__i386) || defined(__x86_64)
36 	asm volatile("clflush (%0)\n\t"
37 		     : : "r"(p) : "memory");
38 #endif
39 }
40 
41 void mem_flush(unsigned char *buf, size_t buf_size)
42 {
43 	unsigned char *cp = buf;
44 	size_t i = 0;
45 
46 	buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
47 
48 	for (i = 0; i < buf_size; i++)
49 		cl_flush(&cp[i * CL_SIZE]);
50 
51 	sb();
52 }
53 
54 /*
55  * Buffer index step advance to workaround HW prefetching interfering with
56  * the measurements.
57  *
58  * Must be a prime to step through all indexes of the buffer.
59  *
60  * Some primes work better than others on some architectures (from MBA/MBM
61  * result stability point of view).
62  */
63 #define FILL_IDX_MULT	23
64 
65 static int fill_one_span_read(unsigned char *buf, size_t buf_size)
66 {
67 	unsigned int size = buf_size / (CL_SIZE / 2);
68 	unsigned int i, idx = 0;
69 	unsigned char sum = 0;
70 
71 	/*
72 	 * Read the buffer in an order that is unexpected by HW prefetching
73 	 * optimizations to prevent them interfering with the caching pattern.
74 	 *
75 	 * The read order is (in terms of halves of cachelines):
76 	 *	i * FILL_IDX_MULT % size
77 	 * The formula is open-coded below to avoiding modulo inside the loop
78 	 * as it improves MBA/MBM result stability on some architectures.
79 	 */
80 	for (i = 0; i < size; i++) {
81 		sum += buf[idx * (CL_SIZE / 2)];
82 
83 		idx += FILL_IDX_MULT;
84 		while (idx >= size)
85 			idx -= size;
86 	}
87 
88 	return sum;
89 }
90 
91 static void fill_one_span_write(unsigned char *buf, size_t buf_size)
92 {
93 	unsigned char *end_ptr = buf + buf_size;
94 	unsigned char *p;
95 
96 	p = buf;
97 	while (p < end_ptr) {
98 		*p = '1';
99 		p += (CL_SIZE / 2);
100 	}
101 }
102 
103 void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
104 {
105 	int ret = 0;
106 
107 	while (1) {
108 		ret = fill_one_span_read(buf, buf_size);
109 		if (once)
110 			break;
111 	}
112 
113 	/* Consume read result so that reading memory is not optimized out. */
114 	*value_sink = ret;
115 }
116 
117 static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
118 {
119 	while (1) {
120 		fill_one_span_write(buf, buf_size);
121 		if (once)
122 			break;
123 	}
124 }
125 
126 unsigned char *alloc_buffer(size_t buf_size, int memflush)
127 {
128 	void *buf = NULL;
129 	uint64_t *p64;
130 	size_t s64;
131 	int ret;
132 
133 	ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
134 	if (ret < 0)
135 		return NULL;
136 
137 	/* Initialize the buffer */
138 	p64 = buf;
139 	s64 = buf_size / sizeof(uint64_t);
140 
141 	while (s64 > 0) {
142 		*p64 = (uint64_t)rand();
143 		p64 += (CL_SIZE / sizeof(uint64_t));
144 		s64 -= (CL_SIZE / sizeof(uint64_t));
145 	}
146 
147 	/* Flush the memory before using to avoid "cache hot pages" effect */
148 	if (memflush)
149 		mem_flush(buf, buf_size);
150 
151 	return buf;
152 }
153 
154 int run_fill_buf(size_t buf_size, int memflush, int op, bool once)
155 {
156 	unsigned char *buf;
157 
158 	buf = alloc_buffer(buf_size, memflush);
159 	if (!buf)
160 		return -1;
161 
162 	if (op == 0)
163 		fill_cache_read(buf, buf_size, once);
164 	else
165 		fill_cache_write(buf, buf_size, once);
166 	free(buf);
167 
168 	return 0;
169 }
170