1 /* libxml2 - Library for parsing XML documents
2  * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3  *
4  * This file is not part of the GNU gettext program, but is used with
5  * GNU gettext.
6  *
7  * The original copyright notice is as follows:
8  */
9 
10 /*
11  * Copyright (C) 1998-2012 Daniel Veillard.  All Rights Reserved.
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to deal
15  * in the Software without restriction, including without limitation the rights
16  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17  * copies of the Software, and to permit persons to whom the Software is fur-
18  * nished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25  * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29  * THE SOFTWARE.
30  */
31 
32 /*
33  * The MIT License (MIT)
34  *
35  * Copyright (c) 2010-2017 Christopher Swenson.
36  * Copyright (c) 2012 Vojtech Fried.
37  * Copyright (c) 2012 Google Inc. All Rights Reserved.
38  *
39  * Permission is hereby granted, free of charge, to any person obtaining a
40  * copy of this software and associated documentation files (the "Software"),
41  * to deal in the Software without restriction, including without limitation
42  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
43  * and/or sell copies of the Software, and to permit persons to whom the
44  * Software is furnished to do so, subject to the following conditions:
45  *
46  * The above copyright notice and this permission notice shall be included in
47  * all copies or substantial portions of the Software.
48  *
49  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
55  * DEALINGS IN THE SOFTWARE.
56  */
57 
58 /*
59  * Taken from https://github.com/swenson/sort
60  * Revision: 05fd77bfec049ce8b7c408c4d3dd2d51ee061a15
61  * Removed all code unrelated to Timsort and made minor adjustments for
62  * cross-platform compatibility.
63  */
64 
65 #include <stdlib.h>
66 #include <stdio.h>
67 #include <string.h>
68 #ifdef HAVE_STDINT_H
69 #include <stdint.h>
70 #elif defined(_WIN32)
71 typedef unsigned __int64 uint64_t;
72 #endif
73 
74 #ifndef SORT_NAME
75 #error "Must declare SORT_NAME"
76 #endif
77 
78 #ifndef SORT_TYPE
79 #error "Must declare SORT_TYPE"
80 #endif
81 
82 #ifndef SORT_CMP
83 #define SORT_CMP(x, y)  ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
84 #endif
85 
86 #ifndef TIM_SORT_STACK_SIZE
87 #define TIM_SORT_STACK_SIZE 128
88 #endif
89 
90 #define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
91 
92 
93 /* Common, type-agnosting functions and constants that we don't want to declare twice. */
94 #ifndef SORT_COMMON_H
95 #define SORT_COMMON_H
96 
97 #ifndef MAX
98 #define MAX(x,y) (((x) > (y) ? (x) : (y)))
99 #endif
100 
101 #ifndef MIN
102 #define MIN(x,y) (((x) < (y) ? (x) : (y)))
103 #endif
104 
105 static int compute_minrun(const uint64_t);
106 
107 #ifndef CLZ
108 #if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
109 #define CLZ __builtin_clzll
110 #else
111 
112 static int clzll(uint64_t);
113 
114 /* adapted from Hacker's Delight */
clzll(uint64_t x)115 static int clzll(uint64_t x) {
116   int n;
117 
118   if (x == 0) {
119     return 64;
120   }
121 
122   n = 0;
123 
124   if (x <= 0x00000000FFFFFFFFL) {
125     n = n + 32;
126     x = x << 32;
127   }
128 
129   if (x <= 0x0000FFFFFFFFFFFFL) {
130     n = n + 16;
131     x = x << 16;
132   }
133 
134   if (x <= 0x00FFFFFFFFFFFFFFL) {
135     n = n + 8;
136     x = x << 8;
137   }
138 
139   if (x <= 0x0FFFFFFFFFFFFFFFL) {
140     n = n + 4;
141     x = x << 4;
142   }
143 
144   if (x <= 0x3FFFFFFFFFFFFFFFL) {
145     n = n + 2;
146     x = x << 2;
147   }
148 
149   if (x <= 0x7FFFFFFFFFFFFFFFL) {
150     n = n + 1;
151   }
152 
153   return n;
154 }
155 
156 #define CLZ clzll
157 #endif
158 #endif
159 
compute_minrun(const uint64_t size)160 static __inline int compute_minrun(const uint64_t size) {
161   const int top_bit = 64 - CLZ(size);
162   const int shift = MAX(top_bit, 6) - 6;
163   const int minrun = size >> shift;
164   const uint64_t mask = (1ULL << shift) - 1;
165 
166   if (mask & size) {
167     return minrun + 1;
168   }
169 
170   return minrun;
171 }
172 
173 #endif /* SORT_COMMON_H */
174 
175 #define SORT_CONCAT(x, y) x ## _ ## y
176 #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
177 #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
178 
179 #define BINARY_INSERTION_FIND          SORT_MAKE_STR(binary_insertion_find)
180 #define BINARY_INSERTION_SORT_START    SORT_MAKE_STR(binary_insertion_sort_start)
181 #define BINARY_INSERTION_SORT          SORT_MAKE_STR(binary_insertion_sort)
182 #define REVERSE_ELEMENTS               SORT_MAKE_STR(reverse_elements)
183 #define COUNT_RUN                      SORT_MAKE_STR(count_run)
184 #define CHECK_INVARIANT                SORT_MAKE_STR(check_invariant)
185 #define TIM_SORT                       SORT_MAKE_STR(tim_sort)
186 #define TIM_SORT_RESIZE                SORT_MAKE_STR(tim_sort_resize)
187 #define TIM_SORT_MERGE                 SORT_MAKE_STR(tim_sort_merge)
188 #define TIM_SORT_COLLAPSE              SORT_MAKE_STR(tim_sort_collapse)
189 
190 #ifndef MAX
191 #define MAX(x,y) (((x) > (y) ? (x) : (y)))
192 #endif
193 #ifndef MIN
194 #define MIN(x,y) (((x) < (y) ? (x) : (y)))
195 #endif
196 
197 typedef struct {
198   size_t start;
199   size_t length;
200 } TIM_SORT_RUN_T;
201 
202 
203 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
204 void TIM_SORT(SORT_TYPE *dst, const size_t size);
205 
206 
207 /* Function used to do a binary search for binary insertion sort */
BINARY_INSERTION_FIND(SORT_TYPE * dst,const SORT_TYPE x,const size_t size)208 static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x,
209     const size_t size) {
210   size_t l, c, r;
211   SORT_TYPE cx;
212   l = 0;
213   r = size - 1;
214   c = r >> 1;
215 
216   /* check for out of bounds at the beginning. */
217   if (SORT_CMP(x, dst[0]) < 0) {
218     return 0;
219   } else if (SORT_CMP(x, dst[r]) > 0) {
220     return r;
221   }
222 
223   cx = dst[c];
224 
225   while (1) {
226     const int val = SORT_CMP(x, cx);
227 
228     if (val < 0) {
229       if (c - l <= 1) {
230         return c;
231       }
232 
233       r = c;
234     } else { /* allow = for stability. The binary search favors the right. */
235       if (r - c <= 1) {
236         return c + 1;
237       }
238 
239       l = c;
240     }
241 
242     c = l + ((r - l) >> 1);
243     cx = dst[c];
244   }
245 }
246 
247 /* Binary insertion sort, but knowing that the first "start" entries are sorted.  Used in timsort. */
BINARY_INSERTION_SORT_START(SORT_TYPE * dst,const size_t start,const size_t size)248 static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) {
249   size_t i;
250 
251   for (i = start; i < size; i++) {
252     size_t j;
253     SORT_TYPE x;
254     size_t location;
255 
256     /* If this entry is already correct, just move along */
257     if (SORT_CMP(dst[i - 1], dst[i]) <= 0) {
258       continue;
259     }
260 
261     /* Else we need to find the right place, shift everything over, and squeeze in */
262     x = dst[i];
263     location = BINARY_INSERTION_FIND(dst, x, i);
264 
265     for (j = i - 1; j >= location; j--) {
266       dst[j + 1] = dst[j];
267 
268       if (j == 0) { /* check edge case because j is unsigned */
269         break;
270       }
271     }
272 
273     dst[location] = x;
274   }
275 }
276 
277 /* Binary insertion sort */
BINARY_INSERTION_SORT(SORT_TYPE * dst,const size_t size)278 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) {
279   /* don't bother sorting an array of size <= 1 */
280   if (size <= 1) {
281     return;
282   }
283 
284   BINARY_INSERTION_SORT_START(dst, 1, size);
285 }
286 
287 /* timsort implementation, based on timsort.txt */
288 
REVERSE_ELEMENTS(SORT_TYPE * dst,size_t start,size_t end)289 static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) {
290   while (1) {
291     if (start >= end) {
292       return;
293     }
294 
295     SORT_SWAP(dst[start], dst[end]);
296     start++;
297     end--;
298   }
299 }
300 
COUNT_RUN(SORT_TYPE * dst,const size_t start,const size_t size)301 static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) {
302   size_t curr;
303 
304   if (size - start == 1) {
305     return 1;
306   }
307 
308   if (start >= size - 2) {
309     if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) {
310       SORT_SWAP(dst[size - 2], dst[size - 1]);
311     }
312 
313     return 2;
314   }
315 
316   curr = start + 2;
317 
318   if (SORT_CMP(dst[start], dst[start + 1]) <= 0) {
319     /* increasing run */
320     while (1) {
321       if (curr == size - 1) {
322         break;
323       }
324 
325       if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) {
326         break;
327       }
328 
329       curr++;
330     }
331 
332     return curr - start;
333   } else {
334     /* decreasing run */
335     while (1) {
336       if (curr == size - 1) {
337         break;
338       }
339 
340       if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) {
341         break;
342       }
343 
344       curr++;
345     }
346 
347     /* reverse in-place */
348     REVERSE_ELEMENTS(dst, start, curr - 1);
349     return curr - start;
350   }
351 }
352 
CHECK_INVARIANT(TIM_SORT_RUN_T * stack,const int stack_curr)353 static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) {
354   size_t A, B, C;
355 
356   if (stack_curr < 2) {
357     return 1;
358   }
359 
360   if (stack_curr == 2) {
361     const size_t A1 = stack[stack_curr - 2].length;
362     const size_t B1 = stack[stack_curr - 1].length;
363 
364     if (A1 <= B1) {
365       return 0;
366     }
367 
368     return 1;
369   }
370 
371   A = stack[stack_curr - 3].length;
372   B = stack[stack_curr - 2].length;
373   C = stack[stack_curr - 1].length;
374 
375   if ((A <= B + C) || (B <= C)) {
376     return 0;
377   }
378 
379   return 1;
380 }
381 
382 typedef struct {
383   size_t alloc;
384   SORT_TYPE *storage;
385 } TEMP_STORAGE_T;
386 
TIM_SORT_RESIZE(TEMP_STORAGE_T * store,const size_t new_size)387 static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) {
388   if (store->alloc < new_size) {
389     SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE));
390 
391     if (tempstore == NULL) {
392       fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes",
393               (unsigned long)(sizeof(SORT_TYPE) * new_size));
394       exit(1);
395     }
396 
397     store->storage = tempstore;
398     store->alloc = new_size;
399   }
400 }
401 
TIM_SORT_MERGE(SORT_TYPE * dst,const TIM_SORT_RUN_T * stack,const int stack_curr,TEMP_STORAGE_T * store)402 static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr,
403                            TEMP_STORAGE_T *store) {
404   const size_t A = stack[stack_curr - 2].length;
405   const size_t B = stack[stack_curr - 1].length;
406   const size_t curr = stack[stack_curr - 2].start;
407   SORT_TYPE *storage;
408   size_t i, j, k;
409   TIM_SORT_RESIZE(store, MIN(A, B));
410   storage = store->storage;
411 
412   /* left merge */
413   if (A < B) {
414     memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
415     i = 0;
416     j = curr + A;
417 
418     for (k = curr; k < curr + A + B; k++) {
419       if ((i < A) && (j < curr + A + B)) {
420         if (SORT_CMP(storage[i], dst[j]) <= 0) {
421           dst[k] = storage[i++];
422         } else {
423           dst[k] = dst[j++];
424         }
425       } else if (i < A) {
426         dst[k] = storage[i++];
427       } else {
428         break;
429       }
430     }
431   } else {
432     /* right merge */
433     memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
434     i = B;
435     j = curr + A;
436     k = curr + A + B;
437 
438     while (k-- > curr) {
439       if ((i > 0) && (j > curr)) {
440         if (SORT_CMP(dst[j - 1], storage[i - 1]) > 0) {
441           dst[k] = dst[--j];
442         } else {
443           dst[k] = storage[--i];
444         }
445       } else if (i > 0) {
446         dst[k] = storage[--i];
447       } else {
448         break;
449       }
450     }
451   }
452 }
453 
TIM_SORT_COLLAPSE(SORT_TYPE * dst,TIM_SORT_RUN_T * stack,int stack_curr,TEMP_STORAGE_T * store,const size_t size)454 static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr,
455                              TEMP_STORAGE_T *store, const size_t size) {
456   while (1) {
457     size_t A, B, C, D;
458     int ABC, BCD, CD;
459 
460     /* if the stack only has one thing on it, we are done with the collapse */
461     if (stack_curr <= 1) {
462       break;
463     }
464 
465     /* if this is the last merge, just do it */
466     if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
467       TIM_SORT_MERGE(dst, stack, stack_curr, store);
468       stack[0].length += stack[1].length;
469       stack_curr--;
470       break;
471     }
472     /* check if the invariant is off for a stack of 2 elements */
473     else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
474       TIM_SORT_MERGE(dst, stack, stack_curr, store);
475       stack[0].length += stack[1].length;
476       stack_curr--;
477       break;
478     } else if (stack_curr == 2) {
479       break;
480     }
481 
482     B = stack[stack_curr - 3].length;
483     C = stack[stack_curr - 2].length;
484     D = stack[stack_curr - 1].length;
485 
486     if (stack_curr >= 4) {
487       A = stack[stack_curr - 4].length;
488       ABC = (A <= B + C);
489     } else {
490       ABC = 0;
491     }
492 
493     BCD = (B <= C + D) || ABC;
494     CD = (C <= D);
495 
496     /* Both invariants are good */
497     if (!BCD && !CD) {
498       break;
499     }
500 
501     /* left merge */
502     if (BCD && !CD) {
503       TIM_SORT_MERGE(dst, stack, stack_curr - 1, store);
504       stack[stack_curr - 3].length += stack[stack_curr - 2].length;
505       stack[stack_curr - 2] = stack[stack_curr - 1];
506       stack_curr--;
507     } else {
508       /* right merge */
509       TIM_SORT_MERGE(dst, stack, stack_curr, store);
510       stack[stack_curr - 2].length += stack[stack_curr - 1].length;
511       stack_curr--;
512     }
513   }
514 
515   return stack_curr;
516 }
517 
PUSH_NEXT(SORT_TYPE * dst,const size_t size,TEMP_STORAGE_T * store,const size_t minrun,TIM_SORT_RUN_T * run_stack,size_t * stack_curr,size_t * curr)518 static __inline int PUSH_NEXT(SORT_TYPE *dst,
519                               const size_t size,
520                               TEMP_STORAGE_T *store,
521                               const size_t minrun,
522                               TIM_SORT_RUN_T *run_stack,
523                               size_t *stack_curr,
524                               size_t *curr) {
525   size_t len = COUNT_RUN(dst, *curr, size);
526   size_t run = minrun;
527 
528   if (run > size - *curr) {
529     run = size - *curr;
530   }
531 
532   if (run > len) {
533     BINARY_INSERTION_SORT_START(&dst[*curr], len, run);
534     len = run;
535   }
536 
537   run_stack[*stack_curr].start = *curr;
538   run_stack[*stack_curr].length = len;
539   (*stack_curr)++;
540   *curr += len;
541 
542   if (*curr == size) {
543     /* finish up */
544     while (*stack_curr > 1) {
545       TIM_SORT_MERGE(dst, run_stack, *stack_curr, store);
546       run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length;
547       (*stack_curr)--;
548     }
549 
550     if (store->storage != NULL) {
551       free(store->storage);
552       store->storage = NULL;
553     }
554 
555     return 0;
556   }
557 
558   return 1;
559 }
560 
TIM_SORT(SORT_TYPE * dst,const size_t size)561 void TIM_SORT(SORT_TYPE *dst, const size_t size) {
562   size_t minrun;
563   TEMP_STORAGE_T _store, *store;
564   TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE];
565   size_t stack_curr = 0;
566   size_t curr = 0;
567 
568   /* don't bother sorting an array of size 1 */
569   if (size <= 1) {
570     return;
571   }
572 
573   if (size < 64) {
574     BINARY_INSERTION_SORT(dst, size);
575     return;
576   }
577 
578   /* compute the minimum run length */
579   minrun = compute_minrun(size);
580   /* temporary storage for merges */
581   store = &_store;
582   store->alloc = 0;
583   store->storage = NULL;
584 
585   if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
586     return;
587   }
588 
589   if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
590     return;
591   }
592 
593   if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
594     return;
595   }
596 
597   while (1) {
598     if (!CHECK_INVARIANT(run_stack, stack_curr)) {
599       stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, stack_curr, store, size);
600       continue;
601     }
602 
603     if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
604       return;
605     }
606   }
607 }
608 
609 #undef SORT_CONCAT
610 #undef SORT_MAKE_STR1
611 #undef SORT_MAKE_STR
612 #undef SORT_NAME
613 #undef SORT_TYPE
614 #undef SORT_CMP
615 #undef TEMP_STORAGE_T
616 #undef TIM_SORT_RUN_T
617 #undef PUSH_NEXT
618 #undef SORT_SWAP
619 #undef SORT_CONCAT
620 #undef SORT_MAKE_STR1
621 #undef SORT_MAKE_STR
622 #undef BINARY_INSERTION_FIND
623 #undef BINARY_INSERTION_SORT_START
624 #undef BINARY_INSERTION_SORT
625 #undef REVERSE_ELEMENTS
626 #undef COUNT_RUN
627 #undef TIM_SORT
628 #undef TIM_SORT_RESIZE
629 #undef TIM_SORT_COLLAPSE
630 #undef TIM_SORT_RUN_T
631 #undef TEMP_STORAGE_T
632