1 /*
2 * Copyright 2013-2015 Samy Al Bahra.
3 * Copyright 2013 Brendon Scheinman.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <ck_cohort.h>
29 #include <ck_rwcohort.h>
30 #include <ck_spinlock.h>
31 #include <inttypes.h>
32 #include <pthread.h>
33 #include <stdio.h>
34 #include <stdint.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include "../../common.h"
40
41 #define max(x, y) (((x) > (y)) ? (x) : (y))
42
43 #ifndef STEPS
44 #define STEPS 1000000
45 #endif
46
47 static unsigned int barrier;
48 static unsigned int flag CK_CC_CACHELINE;
49 static struct affinity affinity;
50 static unsigned int nthr;
51
52 static void
ck_spinlock_fas_lock_with_context(ck_spinlock_fas_t * lock,void * context)53 ck_spinlock_fas_lock_with_context(ck_spinlock_fas_t *lock, void *context)
54 {
55
56 (void)context;
57 ck_spinlock_fas_lock(lock);
58 return;
59 }
60
61 static void
ck_spinlock_fas_unlock_with_context(ck_spinlock_fas_t * lock,void * context)62 ck_spinlock_fas_unlock_with_context(ck_spinlock_fas_t *lock, void *context)
63 {
64
65 (void)context;
66 ck_spinlock_fas_unlock(lock);
67 return;
68 }
69
70 static bool
ck_spinlock_fas_locked_with_context(ck_spinlock_fas_t * lock,void * context)71 ck_spinlock_fas_locked_with_context(ck_spinlock_fas_t *lock, void *context)
72 {
73
74 (void)context;
75 return ck_spinlock_fas_locked(lock);
76 }
77
78 CK_COHORT_PROTOTYPE(fas_fas,
79 ck_spinlock_fas_lock_with_context, ck_spinlock_fas_unlock_with_context, ck_spinlock_fas_locked_with_context,
80 ck_spinlock_fas_lock_with_context, ck_spinlock_fas_unlock_with_context, ck_spinlock_fas_locked_with_context)
81 LOCK_PROTOTYPE(fas_fas)
82
83 struct cohort_record {
84 CK_COHORT_INSTANCE(fas_fas) cohort;
85 } CK_CC_CACHELINE;
86 static struct cohort_record *cohorts;
87
88 static ck_spinlock_t global_lock = CK_SPINLOCK_INITIALIZER;
89 static LOCK_INSTANCE(fas_fas) rw_cohort = LOCK_INITIALIZER;
90 static unsigned int n_cohorts;
91
92 struct block {
93 unsigned int tid;
94 };
95
96 static void *
thread_rwlock(void * pun)97 thread_rwlock(void *pun)
98 {
99 uint64_t s_b, e_b, a, i;
100 uint64_t *value = pun;
101 CK_COHORT_INSTANCE(fas_fas) *cohort;
102 unsigned int core;
103
104 if (aff_iterate_core(&affinity, &core) != 0) {
105 perror("ERROR: Could not affine thread");
106 exit(EXIT_FAILURE);
107 }
108
109 cohort = &((cohorts + (core / (int)(affinity.delta)) % n_cohorts)->cohort);
110
111 ck_pr_inc_uint(&barrier);
112 while (ck_pr_load_uint(&barrier) != nthr)
113 ck_pr_stall();
114
115 for (i = 1, a = 0;; i++) {
116 s_b = rdtsc();
117 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
118 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
119 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
120 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
121 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
122 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
123 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
124 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
125 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
126 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
127 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
128 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
129 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
130 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
131 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
132 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
133 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
134 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
135 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
136 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
137 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
138 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
139 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
140 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
141 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
142 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
143 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
144 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
145 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
146 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
147 READ_LOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
148 READ_UNLOCK(fas_fas, &rw_cohort, cohort, NULL, NULL);
149 e_b = rdtsc();
150
151 a += (e_b - s_b) >> 4;
152
153 if (ck_pr_load_uint(&flag) == 1)
154 break;
155 }
156
157 ck_pr_inc_uint(&barrier);
158 while (ck_pr_load_uint(&barrier) != nthr * 2)
159 ck_pr_stall();
160
161 *value = (a / i);
162 return NULL;
163 }
164
165 int
main(int argc,char * argv[])166 main(int argc, char *argv[])
167 {
168 unsigned int i;
169 pthread_t *threads;
170 uint64_t *latency;
171 struct block *context;
172 ck_spinlock_fas_t *local_lock;
173
174 if (argc != 4) {
175 ck_error("Usage: throughput <number of cohorts> <threads per cohort> <affinity delta>\n");
176 }
177
178 n_cohorts = atoi(argv[1]);
179 if (n_cohorts <= 0) {
180 ck_error("ERROR: Number of cohorts must be greater than 0\n");
181 }
182
183 nthr = n_cohorts * atoi(argv[2]);
184 if (nthr <= 0) {
185 ck_error("ERROR: Number of threads must be greater than 0\n");
186 }
187
188 threads = malloc(sizeof(pthread_t) * nthr);
189 if (threads == NULL) {
190 ck_error("ERROR: Could not allocate thread structures\n");
191 }
192
193 cohorts = malloc(sizeof(struct cohort_record) * n_cohorts);
194 if (cohorts == NULL) {
195 ck_error("ERROR: Could not allocate cohort structures\n");
196 }
197
198 context = malloc(sizeof(struct block) * nthr);
199 if (context == NULL) {
200 ck_error("ERROR: Could not allocate thread contexts\n");
201 }
202
203 affinity.delta = atoi(argv[3]);
204 affinity.request = 0;
205
206 latency = malloc(sizeof(*latency) * nthr);
207 if (latency == NULL) {
208 ck_error("ERROR: Could not create latency buffer\n");
209 }
210 memset(latency, 0, sizeof(*latency) * nthr);
211
212 fprintf(stderr, "Creating cohorts...");
213 for (i = 0 ; i < n_cohorts ; i++) {
214 local_lock = malloc(max(CK_MD_CACHELINE, sizeof(ck_spinlock_fas_t)));
215 if (local_lock == NULL) {
216 ck_error("ERROR: Could not allocate local lock\n");
217 }
218 CK_COHORT_INIT(fas_fas, &((cohorts + i)->cohort), &global_lock, local_lock,
219 CK_COHORT_DEFAULT_LOCAL_PASS_LIMIT);
220 local_lock = NULL;
221 }
222 fprintf(stderr, "done\n");
223
224 fprintf(stderr, "Creating threads (rwlock)...");
225 for (i = 0; i < nthr; i++) {
226 if (pthread_create(&threads[i], NULL, thread_rwlock, latency + i) != 0) {
227 ck_error("ERROR: Could not create thread %d\n", i);
228 }
229 }
230 fprintf(stderr, "done\n");
231
232 common_sleep(10);
233 ck_pr_store_uint(&flag, 1);
234
235 fprintf(stderr, "Waiting for threads to finish acquisition regression...");
236 for (i = 0; i < nthr; i++)
237 pthread_join(threads[i], NULL);
238 fprintf(stderr, "done\n\n");
239
240 for (i = 1; i <= nthr; i++)
241 printf("%10u %20" PRIu64 "\n", i, latency[i - 1]);
242
243 return (0);
244 }
245
246