1 /*
2  * Copyright 2013-2015 Samy Al Bahra.
3  * Copyright 2013 Brendon Scheinman.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <ck_cohort.h>
29 #include <ck_rwcohort.h>
30 #include <ck_spinlock.h>
31 #include <inttypes.h>
32 #include <pthread.h>
33 #include <stdio.h>
34 #include <stdint.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 
39 #include "../../common.h"
40 
41 #define max(x, y) (((x) > (y)) ? (x) : (y))
42 
43 #ifndef STEPS
44 #define STEPS 1000000
45 #endif
46 
47 static unsigned int barrier;
48 static unsigned int flag CK_CC_CACHELINE;
49 static struct affinity affinity;
50 static unsigned int nthr;
51 
52 static void
ck_spinlock_fas_lock_with_context(ck_spinlock_fas_t * lock,void * context)53 ck_spinlock_fas_lock_with_context(ck_spinlock_fas_t *lock, void *context)
54 {
55 
56 	(void)context;
57 	ck_spinlock_fas_lock(lock);
58 	return;
59 }
60 
61 static void
ck_spinlock_fas_unlock_with_context(ck_spinlock_fas_t * lock,void * context)62 ck_spinlock_fas_unlock_with_context(ck_spinlock_fas_t *lock, void *context)
63 {
64 
65 	(void)context;
66 	ck_spinlock_fas_unlock(lock);
67 	return;
68 }
69 
70 static bool
ck_spinlock_fas_locked_with_context(ck_spinlock_fas_t * lock,void * context)71 ck_spinlock_fas_locked_with_context(ck_spinlock_fas_t *lock, void *context)
72 {
73 
74 	(void)context;
75 	return ck_spinlock_fas_locked(lock);
76 }
77 
78 CK_COHORT_PROTOTYPE(fas_fas,
79     ck_spinlock_fas_lock_with_context, ck_spinlock_fas_unlock_with_context, ck_spinlock_fas_locked_with_context,
80     ck_spinlock_fas_lock_with_context, ck_spinlock_fas_unlock_with_context, ck_spinlock_fas_locked_with_context)
81 LOCK_PROTOTYPE(fas_fas)
82 
83 struct cohort_record {
84 	CK_COHORT_INSTANCE(fas_fas) cohort;
85 } CK_CC_CACHELINE;
86 static struct cohort_record *cohorts;
87 
88 static ck_spinlock_t global_lock = CK_SPINLOCK_INITIALIZER;
89 static LOCK_INSTANCE(fas_fas) rw_cohort = LOCK_INITIALIZER;
90 static unsigned int n_cohorts;
91 
92 struct block {
93 	unsigned int tid;
94 };
95 
96 static void *
thread_rwlock(void * pun)97 thread_rwlock(void *pun)
98 {
99 	uint64_t s_b, e_b, a, i;
100 	uint64_t *value = pun;
101 	CK_COHORT_INSTANCE(fas_fas) *cohort;
102 	unsigned int core;
103 
104 	if (aff_iterate_core(&affinity, &core) != 0) {
105 		perror("ERROR: Could not affine thread");
106 		exit(EXIT_FAILURE);
107 	}
108 
109 	cohort = &((cohorts + (core / (int)(affinity.delta)) % n_cohorts)->cohort);
110 
111 	ck_pr_inc_uint(&barrier);
112 	while (ck_pr_load_uint(&barrier) != nthr)
113 		ck_pr_stall();
114 
115 	for (i = 1, a = 0;; i++) {
116 		s_b = rdtsc();
117 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
118 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
119 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
120 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
121 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
122 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
123 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
124 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
125 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
126 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
127 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
128 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
129 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
130 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
131 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
132 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
133 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
134 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
135 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
136 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
137 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
138 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
139 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
140 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
141 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
142 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
143 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
144 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
145 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
146 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
147 		READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
148 		READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
149 		e_b = rdtsc();
150 
151 		a += (e_b - s_b) >> 4;
152 
153 		if (ck_pr_load_uint(&flag) == 1)
154 			break;
155 	}
156 
157 	ck_pr_inc_uint(&barrier);
158 	while (ck_pr_load_uint(&barrier) != nthr * 2)
159 		ck_pr_stall();
160 
161 	*value = (a / i);
162 	return NULL;
163 }
164 
165 int
main(int argc,char * argv[])166 main(int argc, char *argv[])
167 {
168 	unsigned int i;
169 	pthread_t *threads;
170 	uint64_t *latency;
171 	struct block *context;
172 	ck_spinlock_fas_t *local_lock;
173 
174 	if (argc != 4) {
175 		ck_error("Usage: throughput <number of cohorts> <threads per cohort> <affinity delta>\n");
176 	}
177 
178 	n_cohorts = atoi(argv[1]);
179 	if (n_cohorts <= 0) {
180 		ck_error("ERROR: Number of cohorts must be greater than 0\n");
181 	}
182 
183 	nthr = n_cohorts * atoi(argv[2]);
184 	if (nthr <= 0) {
185 		ck_error("ERROR: Number of threads must be greater than 0\n");
186 	}
187 
188 	threads = malloc(sizeof(pthread_t) * nthr);
189 	if (threads == NULL) {
190 		ck_error("ERROR: Could not allocate thread structures\n");
191 	}
192 
193 	cohorts = malloc(sizeof(struct cohort_record) * n_cohorts);
194 	if (cohorts == NULL) {
195 		ck_error("ERROR: Could not allocate cohort structures\n");
196 	}
197 
198 	context = malloc(sizeof(struct block) * nthr);
199 	if (context == NULL) {
200 		ck_error("ERROR: Could not allocate thread contexts\n");
201 	}
202 
203 	affinity.delta = atoi(argv[3]);
204 	affinity.request = 0;
205 
206 	latency = malloc(sizeof(*latency) * nthr);
207 	if (latency == NULL) {
208 		ck_error("ERROR: Could not create latency buffer\n");
209 	}
210 	memset(latency, 0, sizeof(*latency) * nthr);
211 
212 	fprintf(stderr, "Creating cohorts...");
213 	for (i = 0 ; i < n_cohorts ; i++) {
214 		local_lock = malloc(max(CK_MD_CACHELINE, sizeof(ck_spinlock_fas_t)));
215 		if (local_lock == NULL) {
216 			ck_error("ERROR: Could not allocate local lock\n");
217 		}
218 		CK_COHORT_INIT(fas_fas, &((cohorts + i)->cohort), &global_lock, local_lock,
219 		    CK_COHORT_DEFAULT_LOCAL_PASS_LIMIT);
220 		local_lock = NULL;
221 	}
222 	fprintf(stderr, "done\n");
223 
224 	fprintf(stderr, "Creating threads (rwlock)...");
225 	for (i = 0; i < nthr; i++) {
226 		if (pthread_create(&threads[i], NULL, thread_rwlock, latency + i) != 0) {
227 			ck_error("ERROR: Could not create thread %d\n", i);
228 		}
229 	}
230 	fprintf(stderr, "done\n");
231 
232 	common_sleep(10);
233 	ck_pr_store_uint(&flag, 1);
234 
235 	fprintf(stderr, "Waiting for threads to finish acquisition regression...");
236 	for (i = 0; i < nthr; i++)
237 		pthread_join(threads[i], NULL);
238 	fprintf(stderr, "done\n\n");
239 
240 	for (i = 1; i <= nthr; i++)
241 		printf("%10u %20" PRIu64 "\n", i, latency[i - 1]);
242 
243 	return (0);
244 }
245 
246