1 /*
2  * Copyright 2011-2015 Samy Al Bahra.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <ck_rwlock.h>
28 #include <inttypes.h>
29 #include <pthread.h>
30 #include <stdio.h>
31 #include <stdint.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 
35 #include "../../common.h"
36 
37 #ifndef STEPS
38 #define STEPS 1000000
39 #endif
40 
41 static int barrier;
42 static int threads;
43 static unsigned int flag CK_CC_CACHELINE;
44 static struct {
45 	ck_rwlock_t lock;
46 } rw CK_CC_CACHELINE = {
47 	.lock = CK_RWLOCK_INITIALIZER
48 };
49 
50 static struct affinity affinity;
51 
52 #ifdef CK_F_PR_RTM
53 static void *
thread_lock_rtm(void * pun)54 thread_lock_rtm(void *pun)
55 {
56 	uint64_t s_b, e_b, a, i;
57 	uint64_t *value = pun;
58 
59 	if (aff_iterate(&affinity) != 0) {
60 		perror("ERROR: Could not affine thread");
61 		exit(EXIT_FAILURE);
62 	}
63 
64 	ck_pr_inc_int(&barrier);
65 	while (ck_pr_load_int(&barrier) != threads)
66 		ck_pr_stall();
67 
68 	for (i = 1, a = 0;; i++) {
69 		s_b = rdtsc();
70 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
71 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
72 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
73 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
74 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
75 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
76 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
77 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
78 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
79 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
80 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
81 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
82 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
83 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
84 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
85 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
86 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
87 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
88 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
89 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
90 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
91 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
92 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
93 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
94 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
95 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
96 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
97 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
98 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
99 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
100 		CK_ELIDE_LOCK(ck_rwlock_read, &rw.lock);
101 		CK_ELIDE_UNLOCK(ck_rwlock_read, &rw.lock);
102 		e_b = rdtsc();
103 
104 		a += (e_b - s_b) >> 4;
105 
106 		if (ck_pr_load_uint(&flag) == 1)
107 			break;
108 	}
109 
110 	ck_pr_inc_int(&barrier);
111 	while (ck_pr_load_int(&barrier) != threads * 2)
112 		ck_pr_stall();
113 
114 	*value = (a / i);
115 	return NULL;
116 }
117 #endif /* CK_F_PR_RTM */
118 
119 static void *
thread_lock(void * pun)120 thread_lock(void *pun)
121 {
122 	uint64_t s_b, e_b, a, i;
123 	uint64_t *value = pun;
124 
125 	if (aff_iterate(&affinity) != 0) {
126 		perror("ERROR: Could not affine thread");
127 		exit(EXIT_FAILURE);
128 	}
129 
130 	ck_pr_inc_int(&barrier);
131 	while (ck_pr_load_int(&barrier) != threads)
132 		ck_pr_stall();
133 
134 	for (i = 1, a = 0;; i++) {
135 		s_b = rdtsc();
136 		ck_rwlock_read_lock(&rw.lock);
137 		ck_rwlock_read_unlock(&rw.lock);
138 		ck_rwlock_read_lock(&rw.lock);
139 		ck_rwlock_read_unlock(&rw.lock);
140 		ck_rwlock_read_lock(&rw.lock);
141 		ck_rwlock_read_unlock(&rw.lock);
142 		ck_rwlock_read_lock(&rw.lock);
143 		ck_rwlock_read_unlock(&rw.lock);
144 		ck_rwlock_read_lock(&rw.lock);
145 		ck_rwlock_read_unlock(&rw.lock);
146 		ck_rwlock_read_lock(&rw.lock);
147 		ck_rwlock_read_unlock(&rw.lock);
148 		ck_rwlock_read_lock(&rw.lock);
149 		ck_rwlock_read_unlock(&rw.lock);
150 		ck_rwlock_read_lock(&rw.lock);
151 		ck_rwlock_read_unlock(&rw.lock);
152 		ck_rwlock_read_lock(&rw.lock);
153 		ck_rwlock_read_unlock(&rw.lock);
154 		ck_rwlock_read_lock(&rw.lock);
155 		ck_rwlock_read_unlock(&rw.lock);
156 		ck_rwlock_read_lock(&rw.lock);
157 		ck_rwlock_read_unlock(&rw.lock);
158 		ck_rwlock_read_lock(&rw.lock);
159 		ck_rwlock_read_unlock(&rw.lock);
160 		ck_rwlock_read_lock(&rw.lock);
161 		ck_rwlock_read_unlock(&rw.lock);
162 		ck_rwlock_read_lock(&rw.lock);
163 		ck_rwlock_read_unlock(&rw.lock);
164 		ck_rwlock_read_lock(&rw.lock);
165 		ck_rwlock_read_unlock(&rw.lock);
166 		ck_rwlock_read_lock(&rw.lock);
167 		ck_rwlock_read_unlock(&rw.lock);
168 		e_b = rdtsc();
169 
170 		a += (e_b - s_b) >> 4;
171 
172 		if (ck_pr_load_uint(&flag) == 1)
173 			break;
174 	}
175 
176 	ck_pr_inc_int(&barrier);
177 	while (ck_pr_load_int(&barrier) != threads * 2)
178 		ck_pr_stall();
179 
180 	*value = (a / i);
181 	return NULL;
182 }
183 
184 static void
rwlock_test(pthread_t * p,int d,uint64_t * latency,void * (* f)(void *),const char * label)185 rwlock_test(pthread_t *p, int d, uint64_t *latency, void *(*f)(void *), const char *label)
186 {
187 	int t;
188 
189 	ck_pr_store_int(&barrier, 0);
190 	ck_pr_store_uint(&flag, 0);
191 
192 	affinity.delta = d;
193 	affinity.request = 0;
194 
195 	fprintf(stderr, "Creating threads (%s)...", label);
196 	for (t = 0; t < threads; t++) {
197 		if (pthread_create(&p[t], NULL, f, latency + t) != 0) {
198 			ck_error("ERROR: Could not create thread %d\n", t);
199 		}
200 	}
201 	fprintf(stderr, "done\n");
202 
203 	common_sleep(10);
204 	ck_pr_store_uint(&flag, 1);
205 
206 	fprintf(stderr, "Waiting for threads to finish acquisition regression...");
207 	for (t = 0; t < threads; t++)
208 		pthread_join(p[t], NULL);
209 	fprintf(stderr, "done\n\n");
210 
211 	for (t = 1; t <= threads; t++)
212 		printf("%10u %20" PRIu64 "\n", t, latency[t - 1]);
213 
214 	fprintf(stderr, "\n");
215 	return;
216 }
217 
218 
219 int
main(int argc,char * argv[])220 main(int argc, char *argv[])
221 {
222 	int d;
223 	pthread_t *p;
224 	uint64_t *latency;
225 
226 	if (argc != 3) {
227 		ck_error("Usage: throughput <delta> <threads>\n");
228 	}
229 
230 	threads = atoi(argv[2]);
231 	if (threads <= 0) {
232 		ck_error("ERROR: Threads must be a value > 0.\n");
233 	}
234 
235 	p = malloc(sizeof(pthread_t) * threads);
236 	if (p == NULL) {
237 		ck_error("ERROR: Failed to initialize thread.\n");
238 	}
239 
240 	latency = malloc(sizeof(uint64_t) * threads);
241 	if (latency == NULL) {
242 		ck_error("ERROR: Failed to create latency buffer.\n");
243 	}
244 
245 	d = atoi(argv[1]);
246 	rwlock_test(p, d, latency, thread_lock, "rwlock");
247 
248 #ifdef CK_F_PR_RTM
249 	rwlock_test(p, d, latency, thread_lock_rtm, "rwlock, rtm");
250 #endif /* CK_F_PR_RTM */
251 
252 	return 0;
253 }
254 
255