1 /* { dg-do run } */
2 /* { dg-require-effective-target sync_long_long_runtime } */
3 /* { dg-require-effective-target pthread_h } */
4 /* { dg-require-effective-target pthread } */
5 /* { dg-options "-pthread -std=gnu99" } */
6 /* { dg-additional-options "-march=pentium" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
7 
8 /* test of long long atomic ops performed in parallel in 3 pthreads
9    david.gilbert@linaro.org */
10 
11 #include <pthread.h>
12 #include <unistd.h>
13 #ifdef _WIN32
14 #include <windows.h>
15 #endif
16 
17 /*#define DEBUGIT 1 */
18 
19 #ifdef DEBUGIT
20 #include <stdio.h>
21 
22 #define DOABORT(x,...) {\
23 	 fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\
24 	 }
25 
26 #else
27 
28 #define DOABORT(x,...) abort ();
29 
30 #endif
31 
32 /* Passed to each thread to describe which bits it is going to work on.  */
33 struct threadwork {
34   unsigned long long count; /* incremented each time the worker loops.  */
35   unsigned int thread;    /* ID */
36   unsigned int addlsb;    /* 8 bit */
37   unsigned int logic1lsb; /* 5 bit */
38   unsigned int logic2lsb; /* 8 bit */
39 };
40 
41 /* The shared word where all the atomic work is done.  */
42 static volatile long long workspace;
43 
44 /* A shared word to tell the workers to quit when non-0.  */
45 static long long doquit;
46 
47 extern void abort (void);
48 
49 /* Note this test doesn't test the return values much.  */
50 void*
worker(void * data)51 worker (void* data)
52 {
53   struct threadwork *tw = (struct threadwork*)data;
54   long long add1bit = 1ll << tw->addlsb;
55   long long logic1bit = 1ll << tw->logic1lsb;
56   long long logic2bit = 1ll << tw->logic2lsb;
57 
58   /* Clear the bits we use.  */
59   __sync_and_and_fetch (&workspace, ~(0xffll * add1bit));
60   __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit));
61   __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit));
62 
63   do
64     {
65       long long tmp1, tmp2, tmp3;
66       /* OK, lets try and do some stuff to the workspace - by the end
67          of the main loop our area should be the same as it is now - i.e. 0.  */
68 
69       /* Push the arithmetic section up to 128 - one of the threads will
70          case this to carry across the 32bit boundary.  */
71       for (tmp2 = 0; tmp2 < 64; tmp2++)
72 	{
73 	  /* Add 2 using the two different adds.  */
74 	  tmp1 = __sync_add_and_fetch (&workspace, add1bit);
75 	  tmp3 = __sync_fetch_and_add (&workspace, add1bit);
76 
77 	  /* The value should be the intermediate add value in both cases.  */
78 	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
79 	    DOABORT ("Mismatch of add intermediates on thread %d "
80 			"workspace=0x%llx tmp1=0x%llx "
81 			"tmp2=0x%llx tmp3=0x%llx\n",
82 			 tw->thread, workspace, tmp1, tmp2, tmp3);
83 	}
84 
85       /* Set the logic bits.  */
86       tmp2=__sync_or_and_fetch (&workspace,
87 			  0x1fll * logic1bit | 0xffll * logic2bit);
88 
89       /* Check the logic bits are set and the arithmetic value is correct.  */
90       if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit
91 			| 0xffll * add1bit))
92 	  != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit))
93 	DOABORT ("Midloop check failed on thread %d "
94 			"workspace=0x%llx tmp2=0x%llx "
95 			"masktmp2=0x%llx expected=0x%llx\n",
96 		tw->thread, workspace, tmp2,
97 		tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit |
98 			 0xffll * add1bit),
99 		(0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit));
100 
101       /* Pull the arithmetic set back down to 0 - again this should cause a
102 	 carry across the 32bit boundary in one thread.  */
103 
104       for (tmp2 = 0; tmp2 < 64; tmp2++)
105 	{
106 	  /* Subtract 2 using the two different subs.  */
107 	  tmp1=__sync_sub_and_fetch (&workspace, add1bit);
108 	  tmp3=__sync_fetch_and_sub (&workspace, add1bit);
109 
110 	  /* The value should be the intermediate sub value in both cases.  */
111 	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
112 	    DOABORT ("Mismatch of sub intermediates on thread %d "
113 			"workspace=0x%llx tmp1=0x%llx "
114 			"tmp2=0x%llx tmp3=0x%llx\n",
115 			tw->thread, workspace, tmp1, tmp2, tmp3);
116 	}
117 
118 
119       /* Clear the logic bits.  */
120       __sync_fetch_and_xor (&workspace, 0x1fll * logic1bit);
121       tmp3=__sync_and_and_fetch (&workspace, ~(0xffll * logic2bit));
122 
123       /* The logic bits and the arithmetic bits should be zero again.  */
124       if (tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit))
125 	DOABORT ("End of worker loop; bits none 0 on thread %d "
126 			"workspace=0x%llx tmp3=0x%llx "
127 			"mask=0x%llx maskedtmp3=0x%llx\n",
128 		tw->thread, workspace, tmp3, (0x1fll * logic1bit |
129 			0xffll * logic2bit | 0xffll * add1bit),
130 		tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit));
131 
132       __sync_add_and_fetch (&tw->count, 1);
133     }
134   while (!__sync_bool_compare_and_swap (&doquit, 1, 1));
135 
136   pthread_exit (0);
137 }
138 
139 int
main()140 main ()
141 {
142   /* We have 3 threads doing three sets of operations, an 8 bit
143      arithmetic field, a 5 bit logic field and an 8 bit logic
144      field (just to pack them all in).
145 
146   6      5       4       4       3       2       1
147   3      6       8       0       2       4       6       8       0
148   |...,...|...,...|...,...|...,...|...,...|...,...|...,...|...,...
149   - T0   --  T1  -- T2   --T2 --  T0  -*- T2-- T1-- T1   -***- T0-
150    logic2  logic2  arith   log2  arith  log1 log1  arith     log1
151 
152   */
153   unsigned int t;
154   long long tmp;
155   int err;
156 
157   struct threadwork tw[3]={
158     { 0ll, 0, 27, 0, 56 },
159     { 0ll, 1,  8,16, 48 },
160     { 0ll, 2, 40,21, 35 }
161   };
162 
163   pthread_t threads[3];
164 
165   __sync_lock_release (&doquit);
166 
167   /* Get the work space into a known value - All 1's.  */
168   __sync_lock_release (&workspace); /* Now all 0.  */
169   tmp = __sync_val_compare_and_swap (&workspace, 0, -1ll);
170   if (tmp!=0)
171     DOABORT ("Initial __sync_val_compare_and_swap wasn't 0 workspace=0x%llx "
172 		"tmp=0x%llx\n", workspace,tmp);
173 
174   for (t = 0; t < 3; t++)
175   {
176     err=pthread_create (&threads[t], NULL , worker, &tw[t]);
177     if (err) DOABORT ("pthread_create failed on thread %d with error %d\n",
178 	t, err);
179   };
180 
181 #ifdef _WIN32
182   Sleep (5000);
183 #else
184   sleep (5);
185 #endif
186 
187   /* Stop please.  */
188   __sync_lock_test_and_set (&doquit, 1ll);
189 
190   for (t = 0; t < 3; t++)
191     {
192       err=pthread_join (threads[t], NULL);
193       if (err)
194 	DOABORT ("pthread_join failed on thread %d with error %d\n", t, err);
195     };
196 
197   __sync_synchronize ();
198 
199   /* OK, so all the workers have finished -
200      the workers should have zero'd their workspace, the unused areas
201      should still be 1.  */
202   if (!__sync_bool_compare_and_swap (&workspace, 0x040000e0ll, 0))
203     DOABORT ("End of run workspace mismatch, got %llx\n", workspace);
204 
205   /* All the workers should have done some work.  */
206   for (t = 0; t < 3; t++)
207     {
208       if (tw[t].count == 0) DOABORT ("Worker %d gave 0 count\n", t);
209     };
210 
211   return 0;
212 }
213 
214