1 /***************************************************************************
2 where_inc.cpp - include for where.cpp
3 -------------------
4 begin : Apr 7 2018
5 copyright : (C) 2002 by Marc Schellens, G. Duvert
6 email : m_schellens@users.sf.net
7 ***************************************************************************/
8
9 /***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17 template<>
Where(DLong64 * & ret,SizeT & passed_count,bool comp,DLong64 * & comp_ret)18 void Data_<Sp>::Where(DLong64* &ret, SizeT &passed_count, bool comp, DLong64* &comp_ret) {
19 SizeT nEl=this->N_Elements();
20 //code is optimized for 1 thread (no thread) and for presence or absence of complement.
21 int nchunk=(nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl))?CpuTPOOL_NTHREADS:1;
22 if (comp) {
23 if (nchunk==1) {
24 DLong64* yes = (DLong64*)MALLOC(nEl*sizeof(DLong64));
25 DLong64* no = (DLong64*)MALLOC(nEl*sizeof(DLong64));
26 // computational magic of 0 and 1 to keep ony 'good' indexes.
27 SizeT countyes=0;
28 SizeT countno=0;
29 for (SizeT i=0; i<nEl; ++i) {
30 bool tmp=(*this)[i];
31 yes[countyes]=i;
32 no[countno]=i;
33 countyes+=tmp;
34 countno+=(!tmp);
35 }
36 passed_count=countyes;
37 if (countyes) ret=(DLong64*)REALLOC(yes,countyes * sizeof (DLong64)); else {FREE(yes); ret=NULL;}
38 if (countno) comp_ret=(DLong64*)REALLOC(no,countno * sizeof (DLong64)); else {FREE(no); comp_ret=NULL;}
39 return;
40 } else {
41 SizeT chunksize = nEl / nchunk;
42 DLong64* partyes[nchunk];
43 DLong64* partno[nchunk];
44 SizeT partialCountYes[nchunk];
45 SizeT partialCountNo[nchunk];
46 #pragma omp parallel num_threads(nchunk) //shared(partialCount,part) //if (nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl))
47 {
48 int thread_id = currentThreadNumber();
49 SizeT start_index, stop_index;
50 start_index = thread_id * chunksize;
51 if (thread_id != nchunk-1) //robust wrt. use of threads or not.
52 {
53 stop_index = start_index + chunksize;
54 } else
55 {
56 stop_index = nEl;
57 }
58
59 SizeT space=(stop_index-start_index)*sizeof (DLong64);
60 SizeT i=0;
61 //compute a [0] or [1] serie
62 // allocate thread's subset of final result
63 partyes[thread_id] = (DLong64*)MALLOC(space*sizeof(DLong64));
64 partno[thread_id] = (DLong64*)MALLOC(space*sizeof(DLong64));
65 // computational magic of 0 and 1 to keep ony 'good' indexes.
66 SizeT local_count_yes=0;
67 SizeT local_count_no=0;
68 for (i=start_index; i<stop_index; ++i) {
69 bool tmp=(*this)[i];
70 partyes[thread_id][local_count_yes]=i;
71 partno[thread_id][local_count_no]=i;
72 local_count_yes+=tmp;
73 local_count_no+=(!tmp);
74 }
75 partialCountYes[thread_id]=local_count_yes;
76 partialCountNo[thread_id]=local_count_no;
77 } //end parallel section
78 //total count is sum of partial counts:
79 SizeT countyes=0;
80 SizeT countno=0;
81 for (int iloop=0; iloop<nchunk; ++iloop) countyes+=partialCountYes[iloop];
82 for (int iloop=0; iloop<nchunk; ++iloop) countno+=partialCountNo[iloop];
83 // allocate final result
84 if (countyes>0) {
85 ret=(DLong64*)MALLOC(countyes * sizeof (DLong64)); //allocate, nozero
86 //fill in
87 SizeT running_index=0;
88 for (int iloop=0; iloop<nchunk; ++iloop) {
89 for (SizeT jj=0; jj<partialCountYes[iloop]; ++jj) ret[running_index++]=partyes[iloop][jj];
90 }
91 }
92 if (countno>0) {
93 comp_ret=(DLong64*)MALLOC(countno * sizeof (DLong64)); //allocate, nozero
94 //fill in
95 SizeT running_index=0;
96 for (int iloop=0; iloop<nchunk; ++iloop) {
97 for (SizeT jj=0; jj<partialCountNo[iloop]; ++jj) comp_ret[running_index++]=partno[iloop][jj];
98 }
99 }
100 passed_count=countyes;
101 //free temporary arrays.
102 for (int iloop=0; iloop<nchunk; ++iloop) {
103 FREE(partyes[iloop]);
104 FREE(partno[iloop]);
105 }
106 }
107 } else {
108 if (nchunk==1) {
109 DLong64* yes = (DLong64*)MALLOC(nEl*sizeof(DLong64));
110 // computational magic of 0 and 1 to keep ony 'good' indexes.
111 SizeT count=0;
112 for (SizeT i=0; i<nEl; ++i) {
113 bool tmp=(*this)[i];
114 yes[count]=i;
115 count+=tmp;
116 }
117 passed_count=count;
118 if (count) ret=(DLong64*)REALLOC(yes,count * sizeof (DLong64)); else {FREE(yes); ret=NULL;}
119 return;
120 } else {
121 SizeT chunksize = nEl / nchunk;
122 DLong64* part[nchunk];
123 SizeT partialCount[nchunk];
124 #pragma omp parallel num_threads(nchunk) //shared(partialCount,part) //if (nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl))
125 {
126 int thread_id = currentThreadNumber();
127 SizeT start_index, stop_index;
128 start_index = thread_id * chunksize;
129 if (thread_id != nchunk-1) //robust wrt. use of threads or not.
130 {
131 stop_index = start_index + chunksize;
132 } else
133 {
134 stop_index = nEl;
135 }
136
137 SizeT space=(stop_index-start_index)*sizeof (DLong64);
138 SizeT i=0;
139 //compute a [0] or [1] serie
140 // allocate thread's subset of final result
141 part[thread_id] = (DLong64*)MALLOC(space*sizeof(DLong64));
142 // computational magic of 0 and 1 to keep ony 'good' indexes.
143 SizeT local_count=0;
144 for (i=start_index; i<stop_index; ++i) {
145 bool tmp=(*this)[i];
146 part[thread_id][local_count]=i;
147 local_count+=tmp;
148 }
149 partialCount[thread_id]=local_count;
150 } //end parallel section
151 //total count is sum of partial counts:
152 SizeT count=0;
153 for (int iloop=0; iloop<nchunk; ++iloop) count+=partialCount[iloop];
154 // allocate final result
155 if (count > 0) {
156 ret=(DLong64*)MALLOC(count * sizeof (DLong64)); //allocate, nozero
157 //fill in
158 SizeT running_index=0;
159 for (int iloop=0; iloop<nchunk; ++iloop) {
160 for (SizeT jj=0; jj<partialCount[iloop]; ++jj) ret[running_index++]=part[iloop][jj];
161 }
162 }
163 passed_count=count;
164 //free temporary arrays.
165 for (int iloop=0; iloop<nchunk; ++iloop) {
166 FREE(part[iloop]);
167 }
168 }
169 }
170 }
171 template<>
Where(DLong * & ret,SizeT & passed_count,bool comp,DLong * & comp_ret)172 void Data_<Sp>::Where(DLong* &ret, SizeT &passed_count, bool comp, DLong* &comp_ret) {
173 SizeT nEl=this->N_Elements();
174 //code is optimized for 1 thread (no thread) and for presence or absence of complement.
175 int nchunk=(nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl))?CpuTPOOL_NTHREADS:1;
176 if (comp) {
177 if (nchunk==1) {
178 DLong* yes = (DLong*)MALLOC(nEl*sizeof(DLong));
179 DLong* no = (DLong*)MALLOC(nEl*sizeof(DLong));
180 // computational magic of 0 and 1 to keep ony 'good' indexes.
181 SizeT countyes=0;
182 SizeT countno=0;
183 for (SizeT i=0; i<nEl; ++i) {
184 bool tmp=(*this)[i];
185 yes[countyes]=i;
186 no[countno]=i;
187 countyes+=tmp;
188 countno+=(!tmp);
189 }
190 passed_count=countyes;
191 if (countyes) ret=(DLong*)REALLOC(yes,countyes * sizeof (DLong)); else {FREE(yes); ret=NULL;}
192 if (countno) comp_ret=(DLong*)REALLOC(no,countno * sizeof (DLong)); else {FREE(no); comp_ret=NULL;}
193 return;
194 } else {
195 SizeT chunksize = nEl / nchunk;
196 DLong* partyes[nchunk];
197 DLong* partno[nchunk];
198 SizeT partialCountYes[nchunk];
199 SizeT partialCountNo[nchunk];
200 #pragma omp parallel num_threads(nchunk) //shared(partialCount,part) //if (nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl))
201 {
202 int thread_id = currentThreadNumber();
203 SizeT start_index, stop_index;
204 start_index = thread_id * chunksize;
205 if (thread_id != nchunk-1) //robust wrt. use of threads or not.
206 {
207 stop_index = start_index + chunksize;
208 } else
209 {
210 stop_index = nEl;
211 }
212
213 SizeT space=(stop_index-start_index)*sizeof (DLong);
214 SizeT i=0;
215 //compute a [0] or [1] serie
216 // allocate thread's subset of final result
217 partyes[thread_id] = (DLong*)MALLOC(space*sizeof(DLong));
218 partno[thread_id] = (DLong*)MALLOC(space*sizeof(DLong));
219 // computational magic of 0 and 1 to keep ony 'good' indexes.
220 SizeT local_count_yes=0;
221 SizeT local_count_no=0;
222 for (i=start_index; i<stop_index; ++i) {
223 bool tmp=(*this)[i];
224 partyes[thread_id][local_count_yes]=i;
225 partno[thread_id][local_count_no]=i;
226 local_count_yes+=tmp;
227 local_count_no+=(!tmp);
228 }
229 partialCountYes[thread_id]=local_count_yes;
230 partialCountNo[thread_id]=local_count_no;
231 } //end parallel section
232 //total count is sum of partial counts:
233 SizeT countyes=0;
234 SizeT countno=0;
235 for (int iloop=0; iloop<nchunk; ++iloop) countyes+=partialCountYes[iloop];
236 for (int iloop=0; iloop<nchunk; ++iloop) countno+=partialCountNo[iloop];
237 // allocate final result
238 if (countyes>0) {
239 ret=(DLong*)MALLOC(countyes * sizeof (DLong)); //allocate, nozero
240 //fill in
241 SizeT running_index=0;
242 for (int iloop=0; iloop<nchunk; ++iloop) {
243 for (SizeT jj=0; jj<partialCountYes[iloop]; ++jj) ret[running_index++]=partyes[iloop][jj];
244 }
245 }
246 if (countno>0) {
247 comp_ret=(DLong*)MALLOC(countno * sizeof (DLong)); //allocate, nozero
248 //fill in
249 SizeT running_index=0;
250 for (int iloop=0; iloop<nchunk; ++iloop) {
251 for (SizeT jj=0; jj<partialCountNo[iloop]; ++jj) comp_ret[running_index++]=partno[iloop][jj];
252 }
253 }
254 passed_count=countyes;
255 //free temporary arrays.
256 for (int iloop=0; iloop<nchunk; ++iloop) {
257 FREE(partyes[iloop]);
258 FREE(partno[iloop]);
259 }
260 }
261 } else {
262 if (nchunk==1) {
263 DLong* yes = (DLong*)MALLOC(nEl*sizeof(DLong));
264 // computational magic of 0 and 1 to keep ony 'good' indexes.
265 SizeT count=0;
266 for (SizeT i=0; i<nEl; ++i) {
267 bool tmp=(*this)[i];
268 yes[count]=i;
269 count+=tmp;
270 }
271 passed_count=count;
272 if (count) ret=(DLong*)REALLOC(yes,count * sizeof (DLong)); else {FREE(yes); ret=NULL;}
273 return;
274 } else {
275 SizeT chunksize = nEl / nchunk;
276 DLong* part[nchunk];
277 SizeT partialCount[nchunk];
278 #pragma omp parallel num_threads(nchunk) //shared(partialCount,part) //if (nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl))
279 {
280 int thread_id = currentThreadNumber();
281 SizeT start_index, stop_index;
282 start_index = thread_id * chunksize;
283 if (thread_id != nchunk-1) //robust wrt. use of threads or not.
284 {
285 stop_index = start_index + chunksize;
286 } else
287 {
288 stop_index = nEl;
289 }
290
291 SizeT space=(stop_index-start_index)*sizeof (DLong);
292 SizeT i=0;
293 //compute a [0] or [1] serie
294 // allocate thread's subset of final result
295 part[thread_id] = (DLong*)MALLOC(space*sizeof(DLong));
296 // computational magic of 0 and 1 to keep ony 'good' indexes.
297 SizeT local_count=0;
298 for (i=start_index; i<stop_index; ++i) {
299 bool tmp=(*this)[i];
300 part[thread_id][local_count]=i;
301 local_count+=tmp;
302 }
303 partialCount[thread_id]=local_count;
304 } //end parallel section
305 //total count is sum of partial counts:
306 SizeT count=0;
307 for (int iloop=0; iloop<nchunk; ++iloop) count+=partialCount[iloop];
308 // allocate final result
309 if (count > 0) {
310 ret=(DLong*)MALLOC(count * sizeof (DLong)); //allocate, nozero
311 //fill in
312 SizeT running_index=0;
313 for (int iloop=0; iloop<nchunk; ++iloop) {
314 for (SizeT jj=0; jj<partialCount[iloop]; ++jj) ret[running_index++]=part[iloop][jj];
315 }
316 }
317 passed_count=count;
318 //free temporary arrays.
319 for (int iloop=0; iloop<nchunk; ++iloop) {
320 FREE(part[iloop]);
321 }
322 }
323 }
324 }
325