1 static char rcsid[] = "$Id: merge-diagonals-heap.c 222445 2020-04-21 17:53:32Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "merge-diagonals-heap.h"
7 #include "assert.h"
8 #include "mem.h"
9 #include "list.h"
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h> /* For memcpy */
14
15
16 #ifdef DEBUG
17 #define debug(x) x
18 #else
19 #define debug(x)
20 #endif
21
22 #ifdef DEBUG0
23 #define debug0(x) x
24 #else
25 #define debug0(x)
26 #endif
27
28 #ifdef DEBUG6
29 #define debug6(x) x
30 #else
31 #define debug6(x)
32 #endif
33
34
35 #define PARENT(i) (i >> 1)
36 #define LEFT(i) (i << 1)
37 #define RIGHT(i) ((i << 1) | 1)
38
39 #define GETPOS(high,low) (((Univcoord_T) high << 32) + low)
40
41
42 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
43 static void
min_heap_insert(Univcoord_T ** heap,int heapi,Univcoord_T * diagonals)44 min_heap_insert (Univcoord_T **heap, int heapi, Univcoord_T *diagonals) {
45 int i;
46 Univcoord_T diagonal;
47
48 i = heapi;
49 diagonal = diagonals[0];
50 while (i > 1 && (*(heap[PARENT(i)]) > diagonal)) {
51 heap[i] = heap[PARENT(i)];
52 i = PARENT(i);
53 }
54 heap[i] = diagonals;
55
56 return;
57 }
58 #endif
59
60
61 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
62 /* Provide ancestori as inserti */
63 static void
heapify(Univcoord_T ** heap,Univcoord_T * diagonals,int heapsize)64 heapify (Univcoord_T **heap, Univcoord_T *diagonals
65 #ifdef DEBUG6
66 , int heapsize
67 #endif
68 ) {
69 Univcoord_T diagonal;
70 int inserti, smallesti, righti;
71 #ifdef DEBUG6
72 int i;
73 Univcoord_T *ptr;
74 #endif
75
76 diagonal = *diagonals;
77
78 debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal));
79 #ifdef DEBUG6
80 for (i = 1; i <= heapsize; i++) {
81 printf("%d: ",i);
82 ptr = heap[i];
83 while (*ptr < (Univcoord_T) -1) {
84 printf(" %u",*ptr);
85 ptr++;
86 }
87 printf("\n");
88 }
89 printf("\n");
90 #endif
91
92 inserti = 1;
93 smallesti = (*(heap[3]) < *(heap[2])) ? 3 : 2;
94 debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
95 2,3,(unsigned long long) *(heap[2]),(unsigned long long) *(heap[3])));
96 while (diagonal > *(heap[smallesti])) {
97 heap[inserti] = heap[smallesti];
98 inserti = smallesti;
99 smallesti = LEFT(inserti);
100 righti = smallesti+1;
101 debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
102 smallesti,righti,(unsigned long long) *(heap[smallesti]),
103 (unsigned long long) *(heap[righti])));
104 if (*(heap[righti]) < *(heap[smallesti])) {
105 smallesti = righti;
106 }
107 }
108 heap[inserti] = diagonals;
109 debug6(printf("Inserting at %d\n\n",inserti));
110 return;
111 }
112 #endif
113
114
115 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
116 /* No sorting. Does not use SIMD merge, so non-destructive. However,
117 we need to add a sentinel to the end of each stream. */
118 static Univcoord_T **
make_diagonals_heap(List_T * free_list,unsigned char ** stream_high_array,UINT4 ** stream_low_array,int * streamsize_array,int * diagterm_array,int nstreams)119 make_diagonals_heap (List_T *free_list, unsigned char **stream_high_array, UINT4 **stream_low_array,
120 int *streamsize_array, int *diagterm_array, int nstreams) {
121 unsigned char *stream_high;
122 UINT4 *stream_low;
123 Univcoord_T **heap, *copy, *storage, diagterm;
124 int heapsize, heapi;
125 int nelts, l;
126 int streami;
127
128
129 heapsize = 2*nstreams + 1;
130 heap = (Univcoord_T **) CALLOC((heapsize + 1),sizeof(Univcoord_T *));
131
132 debug6(printf("nstreams %d, heapsize %d, heap defined from 0..%d\n",
133 nstreams,heapsize,heapsize));
134
135 *free_list = (List_T) NULL;
136 heapi = 1;
137 for (streami = 0; streami < nstreams; streami++) {
138 stream_high = stream_high_array[streami];
139 stream_low = stream_low_array[streami];
140 nelts = streamsize_array[streami];
141 diagterm = diagterm_array[streami];
142
143 copy = (Univcoord_T *) MALLOC((nelts+1)*sizeof(Univcoord_T));
144 *free_list = List_push(*free_list,(void *) copy);
145
146 for (l = 0; l < nelts; l++) {
147 copy[l] = GETPOS(stream_high[l],stream_low[l]) + diagterm;
148 }
149
150 copy[nelts] = (Univcoord_T) -1; /* sentinel */
151
152 min_heap_insert(heap,heapi,copy);
153 heapi++;
154 }
155
156 /* Set up rest of heap */
157 storage = (Univcoord_T *) MALLOC(sizeof(Univcoord_T));
158 storage[0] = (Univcoord_T) -1; /* sentinel */
159 *free_list = List_push(*free_list,(void *) storage);
160 while (heapi <= heapsize) {
161 heap[heapi] = storage;
162 heapi++;
163 }
164
165 return heap;
166 }
167 #endif
168
169
170 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
171 /* Output: Not aligned, unlike SIMD version */
172 Univcoord_T *
Merge_diagonals_large(int * nelts,unsigned char ** stream_high_array,UINT4 ** stream_low_array,int * streamsize_array,int * diagterm_array,int nstreams)173 Merge_diagonals_large (int *nelts, unsigned char **stream_high_array, UINT4 **stream_low_array,
174 int *streamsize_array, int *diagterm_array, int nstreams) {
175 unsigned char *stream_high;
176 UINT4 *stream_low;
177 Univcoord_T *result, *out, **heap, *diagonals, diagonal, diagterm;
178 int streami;
179 List_T free_list, p;
180 int l;
181
182 if (nstreams == 0) {
183 *nelts = 0;
184 return (Univcoord_T *) NULL;
185
186 } else if (nstreams == 1) {
187 *nelts = streamsize_array[0];
188 stream_high = stream_high_array[0];
189 stream_low = stream_low_array[0];
190 diagterm = diagterm_array[0];
191
192 result = MALLOC((*nelts)*sizeof(Univcoord_T));
193 for (l = 0; l < *nelts; l++) {
194 result[l] = GETPOS(stream_high[l],stream_low[l]) + diagterm;
195 }
196
197 return result;
198
199 } else {
200 *nelts = 0;
201 for (streami = 0; streami < nstreams; streami++) {
202 *nelts += streamsize_array[streami];
203 }
204 out = result = MALLOC((*nelts)*sizeof(Univcoord_T));
205
206 heap = make_diagonals_heap(&free_list,stream_high_array,stream_low_array,
207 streamsize_array,diagterm_array,nstreams);
208
209 while ((diagonal = *(heap[1])) < (Univcoord_T) -1) {
210 *out++ = diagonal;
211 diagonals = ++(heap[1]); /* Advance pointer */
212 #ifdef DEBUG6
213 heapify(heap,diagonals,/*heapsize*/2*nstreams+1);
214 #else
215 heapify(heap,diagonals);
216 #endif
217 }
218
219 for (p = free_list; p != NULL; p = List_next(p)) {
220 diagonals = (Univcoord_T *) List_head(p);
221 FREE(diagonals);
222 }
223 List_free(&free_list);
224 FREE(heap);
225
226 return result;
227 }
228
229 }
230 #endif
231
232
233 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
234 /* No sorting. Does not use SIMD merge, so non-destructive. However,
235 we need to add a sentinel to the end of each stream. */
236 static Univcoord_T **
make_univdiagonals_heap(List_T * free_list,Univcoord_T ** stream_array,int * streamsize_array,int nstreams)237 make_univdiagonals_heap (List_T *free_list, Univcoord_T **stream_array,
238 int *streamsize_array, int nstreams) {
239 Univcoord_T *stream;
240 Univcoord_T **heap, *copy, *storage;
241 int heapsize, heapi;
242 int nelts;
243 int streami;
244
245
246 heapsize = 2*nstreams + 1;
247 heap = (Univcoord_T **) CALLOC((heapsize + 1),sizeof(Univcoord_T *));
248
249 debug6(printf("nstreams %d, heapsize %d, heap defined from 0..%d\n",
250 nstreams,heapsize,heapsize));
251
252 *free_list = (List_T) NULL;
253 heapi = 1;
254 for (streami = 0; streami < nstreams; streami++) {
255 stream = stream_array[streami];
256 nelts = streamsize_array[streami];
257
258 copy = (Univcoord_T *) MALLOC((nelts+1)*sizeof(Univcoord_T));
259 *free_list = List_push(*free_list,(void *) copy);
260
261 memcpy(copy,stream,nelts*sizeof(Univcoord_T));
262 copy[nelts] = (Univcoord_T) -1; /* sentinel */
263
264 min_heap_insert(heap,heapi,copy);
265 heapi++;
266 }
267
268 /* Set up rest of heap */
269 storage = (Univcoord_T *) MALLOC(sizeof(Univcoord_T));
270 storage[0] = (Univcoord_T) -1; /* sentinel */
271 *free_list = List_push(*free_list,(void *) storage);
272 while (heapi <= heapsize) {
273 heap[heapi] = storage;
274 heapi++;
275 }
276
277 return heap;
278 }
279 #endif
280
281
282 /* For AVX2 and AVX512 version, see merge-diagonals-simd-uint8.c */
283 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
284 /* Output: Not aligned, unlike SIMD version */
285 Univcoord_T *
Merge_diagonals_uint8(int * nelts,Univcoord_T ** stream_array,int * streamsize_array,int nstreams)286 Merge_diagonals_uint8 (int *nelts, Univcoord_T **stream_array, int *streamsize_array,
287 int nstreams) {
288 Univcoord_T *stream;
289 Univcoord_T *result, *out, **heap, *diagonals, diagonal;
290 List_T free_list, p;
291 int streami;
292
293 if (nstreams == 0) {
294 *nelts = 0;
295 return (Univcoord_T *) NULL;
296
297 } else if (nstreams == 1) {
298 *nelts = streamsize_array[0];
299 stream = stream_array[0];
300
301 result = MALLOC((*nelts)*sizeof(Univcoord_T));
302 memcpy(result,stream,(*nelts)*sizeof(Univcoord_T));
303 return result;
304
305 } else {
306 *nelts = 0;
307 for (streami = 0; streami < nstreams; streami++) {
308 *nelts += streamsize_array[streami];
309 }
310 out = result = MALLOC((*nelts)*sizeof(Univcoord_T));
311
312 heap = make_univdiagonals_heap(&free_list,stream_array,streamsize_array,nstreams);
313
314 while ((diagonal = *(heap[1])) < (Univcoord_T) -1) {
315 *out++ = diagonal;
316 diagonals = ++(heap[1]); /* Advance pointer */
317 #ifdef DEBUG6
318 heapify(heap,diagonals,/*heapsize*/2*nstreams+1);
319 #else
320 heapify(heap,diagonals);
321 #endif
322 }
323
324 for (p = free_list; p != NULL; p = List_next(p)) {
325 diagonals = (Univcoord_T *) List_head(p);
326 FREE(diagonals);
327 }
328 List_free(&free_list);
329 FREE(heap);
330
331 return result;
332 }
333 }
334 #endif
335
336