1 static char rcsid[] = "$Id: merge-diagonals-heap.c 222445 2020-04-21 17:53:32Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "merge-diagonals-heap.h"
7 #include "assert.h"
8 #include "mem.h"
9 #include "list.h"
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>		/* For memcpy */
14 
15 
16 #ifdef DEBUG
17 #define debug(x) x
18 #else
19 #define debug(x)
20 #endif
21 
22 #ifdef DEBUG0
23 #define debug0(x) x
24 #else
25 #define debug0(x)
26 #endif
27 
28 #ifdef DEBUG6
29 #define debug6(x) x
30 #else
31 #define debug6(x)
32 #endif
33 
34 
35 #define PARENT(i) (i >> 1)
36 #define LEFT(i) (i << 1)
37 #define RIGHT(i) ((i << 1) | 1)
38 
39 #define GETPOS(high,low) (((Univcoord_T) high << 32) + low)
40 
41 
42 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
43 static void
min_heap_insert(Univcoord_T ** heap,int heapi,Univcoord_T * diagonals)44 min_heap_insert (Univcoord_T **heap, int heapi, Univcoord_T *diagonals) {
45   int i;
46   Univcoord_T diagonal;
47 
48   i = heapi;
49   diagonal = diagonals[0];
50   while (i > 1 && (*(heap[PARENT(i)]) > diagonal)) {
51     heap[i] = heap[PARENT(i)];
52     i = PARENT(i);
53   }
54   heap[i] = diagonals;
55 
56   return;
57 }
58 #endif
59 
60 
61 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
62 /* Provide ancestori as inserti */
63 static void
heapify(Univcoord_T ** heap,Univcoord_T * diagonals,int heapsize)64 heapify (Univcoord_T **heap, Univcoord_T *diagonals
65 #ifdef DEBUG6
66 	 , int heapsize
67 #endif
68 	 ) {
69   Univcoord_T diagonal;
70   int inserti, smallesti, righti;
71 #ifdef DEBUG6
72   int i;
73   Univcoord_T *ptr;
74 #endif
75 
76   diagonal = *diagonals;
77 
78   debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal));
79 #ifdef DEBUG6
80   for (i = 1; i <= heapsize; i++) {
81     printf("%d: ",i);
82     ptr = heap[i];
83     while (*ptr < (Univcoord_T) -1) {
84       printf(" %u",*ptr);
85       ptr++;
86     }
87     printf("\n");
88   }
89   printf("\n");
90 #endif
91 
92   inserti = 1;
93   smallesti = (*(heap[3]) < *(heap[2])) ? 3 : 2;
94   debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
95 		2,3,(unsigned long long) *(heap[2]),(unsigned long long) *(heap[3])));
96   while (diagonal > *(heap[smallesti])) {
97     heap[inserti] = heap[smallesti];
98     inserti = smallesti;
99     smallesti = LEFT(inserti);
100     righti = smallesti+1;
101     debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
102 		  smallesti,righti,(unsigned long long) *(heap[smallesti]),
103 		  (unsigned long long) *(heap[righti])));
104     if (*(heap[righti]) < *(heap[smallesti])) {
105       smallesti = righti;
106     }
107   }
108   heap[inserti] = diagonals;
109   debug6(printf("Inserting at %d\n\n",inserti));
110   return;
111 }
112 #endif
113 
114 
115 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
116 /* No sorting.  Does not use SIMD merge, so non-destructive.  However,
117    we need to add a sentinel to the end of each stream. */
118 static Univcoord_T **
make_diagonals_heap(List_T * free_list,unsigned char ** stream_high_array,UINT4 ** stream_low_array,int * streamsize_array,int * diagterm_array,int nstreams)119 make_diagonals_heap (List_T *free_list, unsigned char **stream_high_array, UINT4 **stream_low_array,
120 		     int *streamsize_array, int *diagterm_array, int nstreams) {
121   unsigned char *stream_high;
122   UINT4 *stream_low;
123   Univcoord_T **heap, *copy, *storage, diagterm;
124   int heapsize, heapi;
125   int nelts, l;
126   int streami;
127 
128 
129   heapsize = 2*nstreams + 1;
130   heap = (Univcoord_T **) CALLOC((heapsize + 1),sizeof(Univcoord_T *));
131 
132   debug6(printf("nstreams %d, heapsize %d, heap defined from 0..%d\n",
133 		nstreams,heapsize,heapsize));
134 
135   *free_list = (List_T) NULL;
136   heapi = 1;
137   for (streami = 0; streami < nstreams; streami++) {
138     stream_high = stream_high_array[streami];
139     stream_low = stream_low_array[streami];
140     nelts = streamsize_array[streami];
141     diagterm = diagterm_array[streami];
142 
143     copy = (Univcoord_T *) MALLOC((nelts+1)*sizeof(Univcoord_T));
144     *free_list = List_push(*free_list,(void *) copy);
145 
146     for (l = 0; l < nelts; l++) {
147       copy[l] = GETPOS(stream_high[l],stream_low[l]) + diagterm;
148     }
149 
150     copy[nelts] = (Univcoord_T) -1;	/* sentinel */
151 
152     min_heap_insert(heap,heapi,copy);
153     heapi++;
154   }
155 
156   /* Set up rest of heap */
157   storage = (Univcoord_T *) MALLOC(sizeof(Univcoord_T));
158   storage[0] = (Univcoord_T) -1; 	/* sentinel */
159   *free_list = List_push(*free_list,(void *) storage);
160   while (heapi <= heapsize) {
161     heap[heapi] = storage;
162     heapi++;
163   }
164 
165   return heap;
166 }
167 #endif
168 
169 
170 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
171 /* Output: Not aligned, unlike SIMD version */
172 Univcoord_T *
Merge_diagonals_large(int * nelts,unsigned char ** stream_high_array,UINT4 ** stream_low_array,int * streamsize_array,int * diagterm_array,int nstreams)173 Merge_diagonals_large (int *nelts, unsigned char **stream_high_array, UINT4 **stream_low_array,
174 		       int *streamsize_array, int *diagterm_array, int nstreams) {
175   unsigned char *stream_high;
176   UINT4 *stream_low;
177   Univcoord_T *result, *out, **heap, *diagonals, diagonal, diagterm;
178   int streami;
179   List_T free_list, p;
180   int l;
181 
182   if (nstreams == 0) {
183     *nelts = 0;
184     return (Univcoord_T *) NULL;
185 
186   } else if (nstreams == 1) {
187     *nelts = streamsize_array[0];
188     stream_high = stream_high_array[0];
189     stream_low = stream_low_array[0];
190     diagterm = diagterm_array[0];
191 
192     result = MALLOC((*nelts)*sizeof(Univcoord_T));
193     for (l = 0; l < *nelts; l++) {
194       result[l] = GETPOS(stream_high[l],stream_low[l]) + diagterm;
195     }
196 
197     return result;
198 
199   } else {
200     *nelts = 0;
201     for (streami = 0; streami < nstreams; streami++) {
202       *nelts += streamsize_array[streami];
203     }
204     out = result = MALLOC((*nelts)*sizeof(Univcoord_T));
205 
206     heap = make_diagonals_heap(&free_list,stream_high_array,stream_low_array,
207 			       streamsize_array,diagterm_array,nstreams);
208 
209     while ((diagonal = *(heap[1])) < (Univcoord_T) -1) {
210       *out++ = diagonal;
211       diagonals = ++(heap[1]);	/* Advance pointer */
212 #ifdef DEBUG6
213       heapify(heap,diagonals,/*heapsize*/2*nstreams+1);
214 #else
215       heapify(heap,diagonals);
216 #endif
217     }
218 
219     for (p = free_list; p != NULL; p = List_next(p)) {
220       diagonals = (Univcoord_T *) List_head(p);
221       FREE(diagonals);
222     }
223     List_free(&free_list);
224     FREE(heap);
225 
226     return result;
227   }
228 
229 }
230 #endif
231 
232 
233 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
234 /* No sorting.  Does not use SIMD merge, so non-destructive.  However,
235    we need to add a sentinel to the end of each stream. */
236 static Univcoord_T **
make_univdiagonals_heap(List_T * free_list,Univcoord_T ** stream_array,int * streamsize_array,int nstreams)237 make_univdiagonals_heap (List_T *free_list, Univcoord_T **stream_array,
238 			 int *streamsize_array, int nstreams) {
239   Univcoord_T *stream;
240   Univcoord_T **heap, *copy, *storage;
241   int heapsize, heapi;
242   int nelts;
243   int streami;
244 
245 
246   heapsize = 2*nstreams + 1;
247   heap = (Univcoord_T **) CALLOC((heapsize + 1),sizeof(Univcoord_T *));
248 
249   debug6(printf("nstreams %d, heapsize %d, heap defined from 0..%d\n",
250 		nstreams,heapsize,heapsize));
251 
252   *free_list = (List_T) NULL;
253   heapi = 1;
254   for (streami = 0; streami < nstreams; streami++) {
255     stream = stream_array[streami];
256     nelts = streamsize_array[streami];
257 
258     copy = (Univcoord_T *) MALLOC((nelts+1)*sizeof(Univcoord_T));
259     *free_list = List_push(*free_list,(void *) copy);
260 
261     memcpy(copy,stream,nelts*sizeof(Univcoord_T));
262     copy[nelts] = (Univcoord_T) -1;	/* sentinel */
263 
264     min_heap_insert(heap,heapi,copy);
265     heapi++;
266   }
267 
268   /* Set up rest of heap */
269   storage = (Univcoord_T *) MALLOC(sizeof(Univcoord_T));
270   storage[0] = (Univcoord_T) -1; 	/* sentinel */
271   *free_list = List_push(*free_list,(void *) storage);
272   while (heapi <= heapsize) {
273     heap[heapi] = storage;
274     heapi++;
275   }
276 
277   return heap;
278 }
279 #endif
280 
281 
282 /* For AVX2 and AVX512 version, see merge-diagonals-simd-uint8.c */
283 #if !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
284 /* Output: Not aligned, unlike SIMD version */
285 Univcoord_T *
Merge_diagonals_uint8(int * nelts,Univcoord_T ** stream_array,int * streamsize_array,int nstreams)286 Merge_diagonals_uint8 (int *nelts, Univcoord_T **stream_array, int *streamsize_array,
287 		       int nstreams) {
288   Univcoord_T *stream;
289   Univcoord_T *result, *out, **heap, *diagonals, diagonal;
290   List_T free_list, p;
291   int streami;
292 
293   if (nstreams == 0) {
294     *nelts = 0;
295     return (Univcoord_T *) NULL;
296 
297   } else if (nstreams == 1) {
298     *nelts = streamsize_array[0];
299     stream = stream_array[0];
300 
301     result = MALLOC((*nelts)*sizeof(Univcoord_T));
302     memcpy(result,stream,(*nelts)*sizeof(Univcoord_T));
303     return result;
304 
305   } else {
306     *nelts = 0;
307     for (streami = 0; streami < nstreams; streami++) {
308       *nelts += streamsize_array[streami];
309     }
310     out = result = MALLOC((*nelts)*sizeof(Univcoord_T));
311 
312     heap = make_univdiagonals_heap(&free_list,stream_array,streamsize_array,nstreams);
313 
314     while ((diagonal = *(heap[1])) < (Univcoord_T) -1) {
315       *out++ = diagonal;
316       diagonals = ++(heap[1]);	/* Advance pointer */
317 #ifdef DEBUG6
318       heapify(heap,diagonals,/*heapsize*/2*nstreams+1);
319 #else
320       heapify(heap,diagonals);
321 #endif
322     }
323 
324     for (p = free_list; p != NULL; p = List_next(p)) {
325       diagonals = (Univcoord_T *) List_head(p);
326       FREE(diagonals);
327     }
328     List_free(&free_list);
329     FREE(heap);
330 
331     return result;
332   }
333 }
334 #endif
335 
336