1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 1998-2020 The R Core Team
4 * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
5 * Copyright (C) 2004 The R Foundation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, a copy is available at
19 * https://www.R-project.org/Licenses/
20 */
21
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <Defn.h> /* => Utils.h with the protos from here; Rinternals.h */
27 #include <Internal.h>
28 #include <Rmath.h>
29 #include <R_ext/RS.h> /* for Calloc/Free */
30 #include <float.h> /* for DBL_MAX */
31 #include <R_ext/Itermacros.h> /* for ITERATE_BY_REGION */
32
33 /*--- Part I: Comparison Utilities ---*/
34
icmp(int x,int y,Rboolean nalast)35 static int icmp(int x, int y, Rboolean nalast)
36 {
37 if (x == NA_INTEGER && y == NA_INTEGER) return 0;
38 if (x == NA_INTEGER)return nalast ? 1 : -1;
39 if (y == NA_INTEGER)return nalast ? -1 : 1;
40 if (x < y) return -1;
41 if (x > y) return 1;
42 return 0;
43 }
44
rcmp(double x,double y,Rboolean nalast)45 static int rcmp(double x, double y, Rboolean nalast)
46 {
47 int nax = ISNAN(x), nay = ISNAN(y);
48 if (nax && nay) return 0;
49 if (nax) return nalast ? 1 : -1;
50 if (nay) return nalast ? -1 : 1;
51 if (x < y) return -1;
52 if (x > y) return 1;
53 return 0;
54 }
55
ccmp(Rcomplex x,Rcomplex y,Rboolean nalast)56 static int ccmp(Rcomplex x, Rcomplex y, Rboolean nalast)
57 {
58 int nax = ISNAN(x.r), nay = ISNAN(y.r);
59 /* compare real parts */
60 if (nax && nay) return 0;
61 if (nax) return nalast ? 1 : -1;
62 if (nay) return nalast ? -1 : 1;
63 if (x.r < y.r) return -1;
64 if (x.r > y.r) return 1;
65 /* compare complex parts */
66 nax = ISNAN(x.i); nay = ISNAN(y.i);
67 if (nax && nay) return 0;
68 if (nax) return nalast ? 1 : -1;
69 if (nay) return nalast ? -1 : 1;
70 if (x.i < y.i) return -1;
71 if (x.i > y.i) return 1;
72
73 return 0; /* equal */
74 }
75
scmp(SEXP x,SEXP y,Rboolean nalast)76 static int scmp(SEXP x, SEXP y, Rboolean nalast)
77 {
78 if (x == NA_STRING && y == NA_STRING) return 0;
79 if (x == NA_STRING) return nalast ? 1 : -1;
80 if (y == NA_STRING) return nalast ? -1 : 1;
81 if (x == y) return 0; /* same string in cache */
82 return Scollate(x, y);
83 }
84
85 #define R_INT_MIN 1 + INT_MIN //INT_MIN is NA_INTEGER
isUnsorted(SEXP x,Rboolean strictly)86 Rboolean isUnsorted(SEXP x, Rboolean strictly)
87 {
88 R_xlen_t n, i;
89 int itmp = INT_MIN; /* this is NA_INTEGER, < all valid nonNA R integer
90 values */
91 double dtmp = R_NegInf; /* R_NegInf is min possible double "value",
92 ***this is <= all nonNA R numeric values but
93 NOT < all nonNA values*** */
94 if (!isVectorAtomic(x))
95 error(_("only atomic vectors can be tested to be sorted"));
96 n = XLENGTH(x);
97 if(n >= 2)
98 switch (TYPEOF(x)) {
99
100 /* NOTE: x must have no NAs {is.na(.) in R};
101 hence be faster than `rcmp()', `icmp()' for these two cases */
102
103 /* The only difference between strictly and not is '>' vs '>='
104 but we want the if() outside the loop */
105
106 /* x can be an ALTREP here provided that its sortedness is unknown,
107 so we use ITERATE_BY_REGION to get the multiple INTEGER calls
108 outside of the tight loop and be ALTREP safe. */
109 case LGLSXP:
110 case INTSXP:
111 if(strictly) {
112 ITERATE_BY_REGION(x, xptr, i, nbatch, int, INTEGER, {
113 /* itmp initialized to INT_MIN which is < all
114 valid nonNA R int values so no need to
115 exclude first iteration
116
117 After first iteration itmp is value at end
118 of last batch */
119 if(itmp >= xptr[0])
120 return TRUE;
121 for(R_xlen_t k = 0; k < nbatch - 1; k++) {
122 if(xptr[k] >= xptr[k+1])
123 return TRUE;
124 }
125 itmp = xptr[nbatch - 1];
126 });
127 } else {
128 ITERATE_BY_REGION(x, xptr, i, nbatch, int, INTEGER, {
129 if(itmp > xptr[0]) //deal with batch barriers
130 return TRUE;
131 for(R_xlen_t k = 0; k < nbatch - 1; k++) {
132 if(xptr[k] > xptr[k+1])
133 return TRUE;
134 }
135 itmp = xptr[nbatch - 1];
136 });
137 }
138 break;
139 case REALSXP:
140 if(strictly) {
141 ITERATE_BY_REGION(x, xptr, i, nbatch, double, REAL, {
142 /* first element could be R_NegInf which is
143 initialization value of dtmp so don't do
144 the barrier check at i == 0 since its >=
145 here
146
147 After first iteration dtmp is value at end
148 of last batch */
149 if(i > 0 && dtmp >= xptr[0]) //deal with batch barriers
150 return TRUE;
151 for(R_xlen_t k = 0; k < nbatch - 1; k++) {
152 if(xptr[k] >= xptr[k+1])
153 return TRUE;
154 }
155 dtmp = xptr[nbatch - 1];
156 });
157 } else {
158 /* nonstrict, first element can never be < dtmp (== R_NegInf),
159 so no need to exclude first iteration in barrier check */
160 ITERATE_BY_REGION(x, xptr, i, nbatch, double, REAL, {
161 if(dtmp > xptr[0]) /* deal with batch barriers, dtmp
162 is end of last batch */
163 return TRUE;
164 for(R_xlen_t k = 0; k < nbatch - 1; k++) {
165 if(xptr[k] > xptr[k+1])
166 return TRUE;
167 }
168 dtmp = xptr[nbatch - 1];
169 });
170 }
171 break;
172 case CPLXSXP:
173 if(strictly) {
174 for(i = 0; i+1 < n ; i++)
175 if(ccmp(COMPLEX(x)[i], COMPLEX(x)[i+1], TRUE) >= 0)
176 return TRUE;
177 } else {
178 for(i = 0; i+1 < n ; i++)
179 if(ccmp(COMPLEX(x)[i], COMPLEX(x)[i+1], TRUE) > 0)
180 return TRUE;
181 }
182 break;
183 case STRSXP:
184 if(strictly) {
185 for(i = 0; i+1 < n ; i++)
186 if(scmp(STRING_ELT(x, i ),
187 STRING_ELT(x,i+1), TRUE) >= 0)
188 return TRUE;
189 } else {
190 for(i = 0; i+1 < n ; i++)
191 if(scmp(STRING_ELT(x, i ),
192 STRING_ELT(x,i+1), TRUE) > 0)
193 return TRUE;
194 }
195 break;
196 case RAWSXP: // being compatible with raw_relop() in ./relop.c
197 if(strictly) {
198 for(i = 0; i+1 < n ; i++)
199 if(RAW(x)[i] >= RAW(x)[i+1])
200 return TRUE;
201
202 } else {
203 for(i = 0; i+1 < n ; i++)
204 if(RAW(x)[i] > RAW(x)[i+1])
205 return TRUE;
206 }
207 break;
208 default:
209 UNIMPLEMENTED_TYPE("isUnsorted", x);
210 }
211 return FALSE;/* sorted */
212 } // isUnsorted()
213
214 #define FIRST_LAST_DIFF(x, type) type##_ELT(x, 0) != type##_ELT(x, XLENGTH(x) - 1)
215 /* assumes no NAs, which is the case for do_isunsorted, they're removed in R code */
216 #define SORTED_VEC_NONCONST(x) (XLENGTH(x) > 1 && \
217 ((TYPEOF(x) == INTSXP && FIRST_LAST_DIFF(x, INTEGER)) || \
218 (TYPEOF(x) == REALSXP && FIRST_LAST_DIFF(x, REAL))))
219
do_isunsorted(SEXP call,SEXP op,SEXP args,SEXP rho)220 SEXP attribute_hidden do_isunsorted(SEXP call, SEXP op, SEXP args, SEXP rho)
221 {
222 if (length(args) == 2) {
223 /* This allows for old calls of the form
224 .Internal(is.unsorted(x, strictly)) prior to adding the
225 na.rm argument in order to match the closure signature to
226 the signature expected by methods called in
227 DispatchOrEval. These old calls might have been captured in
228 closures or S4 generic definitions. This code inserts a
229 value for the na.rm argument in the list of evaluated
230 arguments. The first cell of args is protected when
231 do_isunsorted is called. */
232 SEXP tmp = CONS_NR(R_FalseValue, CDR(args));
233 SETCDR(args, tmp);
234 }
235 checkArity(op, args);
236
237 SEXP ans, x = CAR(args);
238 /* DispatchOrEval internal generic: is.unsorted */
239 if(DispatchOrEval(call, op, "is.unsorted", args, rho, &ans, 0, 1))
240 return ans;
241 PROTECT(args = ans); // args evaluated now
242
243 int sorted = UNKNOWN_SORTEDNESS;
244 switch(TYPEOF(x)) {
245 case INTSXP:
246 sorted = INTEGER_IS_SORTED(x);
247 break;
248 case REALSXP:
249 sorted = REAL_IS_SORTED(x);
250 break;
251 default:
252 break;
253 }
254
255 SEXP strictlyArg = CADDR(args);
256 /* right now is.unsorted only tells you if something is sorted ascending
257 hopefully someday it will work for descending too */
258 if(!asLogical(strictlyArg)) { /*not strict since we don't memoize that */
259 if(KNOWN_INCR(sorted)) {
260 UNPROTECT(1);
261 return ScalarLogical(FALSE);
262 }
263 /* is.unsorted returns TRUE for vectors sorted in descending order
264 iff the vector has more than 1 unique value */
265 else if(KNOWN_DECR(sorted)) {
266 UNPROTECT(1);
267 return ScalarLogical(SORTED_VEC_NONCONST(x));
268 } else if (sorted == KNOWN_UNSORTED) {
269 UNPROTECT(1);
270 return ScalarLogical(TRUE);
271 }
272 }
273
274 int strictly = asLogical(strictlyArg);
275 if(strictly == NA_LOGICAL)
276 error(_("invalid '%s' argument"), "strictly");
277 if(isVectorAtomic(x)) {
278 UNPROTECT(1);
279 return (xlength(x) < 2) ? ScalarLogical(FALSE) :
280 ScalarLogical(isUnsorted(x, strictly));
281 }
282 if(isObject(x)) {
283 SEXP call;
284 PROTECT(call = // R> .gtn(x, strictly) :
285 lang3(install(".gtn"), x, strictlyArg));
286 ans = eval(call, rho);
287 UNPROTECT(2);
288 return ans;
289 }
290 else {
291 UNPROTECT(1);
292 return ScalarLogical(NA_LOGICAL);
293 }
294 }
295
296
297 /*--- Part II: Complete (non-partial) Sorting ---*/
298
299
300 /* SHELLsort -- corrected from R. Sedgewick `Algorithms in C'
301 * (version of BDR's lqs():*/
302 #define sort_body(TYPE_CMP, TYPE_PROT, TYPE_UNPROT) \
303 Rboolean nalast=TRUE; \
304 int i, j, h; \
305 \
306 for (h = 1; h <= n / 9; h = 3 * h + 1); \
307 for (; h > 0; h /= 3) \
308 for (i = h; i < n; i++) { \
309 v = TYPE_PROT(x[i]); \
310 j = i; \
311 while (j >= h && TYPE_CMP(x[j - h], v, nalast) > 0) \
312 { x[j] = x[j - h]; j -= h; } \
313 x[j] = v; \
314 TYPE_UNPROT; \
315 }
316
R_isort(int * x,int n)317 void R_isort(int *x, int n)
318 {
319 int v;
320 sort_body(icmp,,)
321 }
322
R_rsort(double * x,int n)323 void R_rsort(double *x, int n)
324 {
325 double v;
326 sort_body(rcmp,,)
327 }
328
R_csort(Rcomplex * x,int n)329 void R_csort(Rcomplex *x, int n)
330 {
331 Rcomplex v;
332 sort_body(ccmp,,)
333 }
334
335 /* used in platform.c */
ssort(SEXP * x,int n)336 void attribute_hidden ssort(SEXP *x, int n)
337 {
338 SEXP v;
339 sort_body(scmp,PROTECT,UNPROTECT(1))
340 }
341
rsort_with_index(double * x,int * indx,int n)342 void rsort_with_index(double *x, int *indx, int n)
343 {
344 double v;
345 int i, j, h, iv;
346
347 for (h = 1; h <= n / 9; h = 3 * h + 1);
348 for (; h > 0; h /= 3)
349 for (i = h; i < n; i++) {
350 v = x[i]; iv = indx[i];
351 j = i;
352 while (j >= h && rcmp(x[j - h], v, TRUE) > 0)
353 { x[j] = x[j - h]; indx[j] = indx[j-h]; j -= h; }
354 x[j] = v; indx[j] = iv;
355 }
356 }
357
revsort(double * a,int * ib,int n)358 void revsort(double *a, int *ib, int n)
359 {
360 /* Sort a[] into descending order by "heapsort";
361 * sort ib[] alongside;
362 * if initially, ib[] = 1...n, it will contain the permutation finally
363 */
364
365 int l, j, ir, i;
366 double ra;
367 int ii;
368
369 if (n <= 1) return;
370
371 a--; ib--;
372
373 l = (n >> 1) + 1;
374 ir = n;
375
376 for (;;) {
377 if (l > 1) {
378 l = l - 1;
379 ra = a[l];
380 ii = ib[l];
381 }
382 else {
383 ra = a[ir];
384 ii = ib[ir];
385 a[ir] = a[1];
386 ib[ir] = ib[1];
387 if (--ir == 1) {
388 a[1] = ra;
389 ib[1] = ii;
390 return;
391 }
392 }
393 i = l;
394 j = l << 1;
395 while (j <= ir) {
396 if (j < ir && a[j] > a[j + 1]) ++j;
397 if (ra > a[j]) {
398 a[i] = a[j];
399 ib[i] = ib[j];
400 j += (i = j);
401 }
402 else
403 j = ir + 1;
404 }
405 a[i] = ra;
406 ib[i] = ii;
407 }
408 }
409
410
do_sort(SEXP call,SEXP op,SEXP args,SEXP rho)411 SEXP attribute_hidden do_sort(SEXP call, SEXP op, SEXP args, SEXP rho)
412 {
413 SEXP ans;
414 Rboolean decreasing;
415
416 checkArity(op, args);
417
418 decreasing = asLogical(CADR(args));
419 if(decreasing == NA_LOGICAL)
420 error(_("'decreasing' must be TRUE or FALSE"));
421 if(CAR(args) == R_NilValue) return R_NilValue;
422 if(!isVectorAtomic(CAR(args)))
423 error(_("only atomic vectors can be sorted"));
424 if(TYPEOF(CAR(args)) == RAWSXP)
425 error(_("raw vectors cannot be sorted"));
426
427 /* we need consistent behaviour here, including dropping attibutes,
428 so as from 2.3.0 we always duplicate. */
429 PROTECT(ans = duplicate(CAR(args)));
430 SET_ATTRIB(ans, R_NilValue); /* this is never called with names */
431 SET_OBJECT(ans, 0); /* we may have just stripped off the class */
432 sortVector(ans, decreasing);
433 UNPROTECT(1);
434 return(ans); /* wrapping with metadata happens at end of sort.int */
435 }
436
fastpass_sortcheck(SEXP x,int wanted)437 Rboolean fastpass_sortcheck(SEXP x, int wanted) {
438 if(!KNOWN_SORTED(wanted))
439 return FALSE;
440
441 int sorted = UNKNOWN_SORTEDNESS;
442 Rboolean noNA = FALSE, done = FALSE;
443
444 switch(TYPEOF(x)) {
445 case INTSXP:
446 sorted = INTEGER_IS_SORTED(x);
447 noNA = INTEGER_NO_NA(x);
448 break;
449 case REALSXP:
450 sorted = REAL_IS_SORTED(x);
451 noNA = REAL_NO_NA(x);
452 break;
453 default:
454 /* keep sorted == UNKNOWN_SORTEDNESS */
455 break;
456 }
457
458 /* we know wanted is not NA_INTEGER or 0 at this point because
459 of the immediate return at the beginning for that case */
460 if(!KNOWN_SORTED(sorted)) {
461 done = FALSE;
462 } else if(sorted == wanted) {
463 done = TRUE;
464 /* if there are no NAs, na.last can be ignored */
465 } else if(noNA && sorted * wanted > 0) {
466 /* same sign, thus same direction of sort */
467 done = TRUE;
468 }
469
470 /* Increasing, usually fairly short, sequences of integers often
471 arise as levels in as.factor. A quick check here allows a fast
472 return in sort.int. */
473 if (! done && TYPEOF(x) == INTSXP && wanted > 0 && ! ALTREP(x)) {
474 R_xlen_t len = XLENGTH(x);
475 if (len > 0) {
476 int *px = INTEGER(x);
477 int last = px[0];
478 if (last != NA_INTEGER) {
479 for (R_xlen_t i = 1; i < len; i++) {
480 int next = px[i];
481 if (next < last || next == NA_INTEGER)
482 return FALSE;
483 else last = next;
484 }
485 return TRUE;
486 }
487 }
488 }
489
490 return done;
491 }
492
makeSortEnum(int decr,int nalast)493 static int makeSortEnum(int decr, int nalast) {
494
495 /* passing decr = NA_INTEGER indicates UNKNOWN_SORTEDNESS. */
496 if (decr == NA_INTEGER)
497 return UNKNOWN_SORTEDNESS;
498
499 if (nalast == NA_INTEGER)
500 nalast = 1; // they were/will be removed so we say they are "last"
501
502 if (decr)
503 return nalast ? SORTED_DECR : SORTED_DECR_NA_1ST;
504 else /* increasing */
505 return nalast ? SORTED_INCR : SORTED_INCR_NA_1ST;
506 }
507
508 /* .Internal(sorted_fpass(x, decr, nalast)) */
do_sorted_fpass(SEXP call,SEXP op,SEXP args,SEXP rho)509 SEXP attribute_hidden do_sorted_fpass(SEXP call, SEXP op, SEXP args, SEXP rho)
510 {
511 checkArity(op, args);
512
513 int decr = asInteger(CADR(args));
514 int nalast = asInteger(CADDR(args));
515 int wanted = makeSortEnum(decr, nalast);
516 SEXP x = PROTECT(CAR(args));
517 Rboolean wassorted = fastpass_sortcheck(x, wanted);
518 UNPROTECT(1);
519 return ScalarLogical(wassorted);
520 }
521
522
523 /* faster versions of shellsort, following Sedgewick (1986) */
524
525 /* c(1, 4^k +3*2^(k-1)+1) */
526 #ifdef LONG_VECTOR_SUPPORT
527 // This goes up to 2^38: extend eventually.
528 #define NI 20
529 static const R_xlen_t incs[NI + 1] = {
530 274878693377L, 68719869953L, 17180065793L, 4295065601L,
531 1073790977L, 268460033L, 67121153L, 16783361L, 4197377L, 1050113L,
532 262913L, 65921L, 16577L, 4193L, 1073L, 281L, 77L, 23L, 8L, 1L, 0L
533 };
534 #else
535 #define NI 16
536 static const int incs[NI + 1] = {
537 1073790977, 268460033, 67121153, 16783361, 4197377, 1050113,
538 262913, 65921, 16577, 4193, 1073, 281, 77, 23, 8, 1, 0
539 };
540 #endif
541
542 #define sort2_body \
543 for (h = incs[t]; t < NI; h = incs[++t]) \
544 for (i = h; i < n; i++) { \
545 v = x[i]; \
546 j = i; \
547 while (j >= h && x[j - h] less v) { x[j] = x[j - h]; j -= h; } \
548 x[j] = v; \
549 }
550
551 /* These are only called with n >= 2 */
R_isort2(int * x,R_xlen_t n,Rboolean decreasing)552 static void R_isort2(int *x, R_xlen_t n, Rboolean decreasing)
553 {
554 int v;
555 R_xlen_t i, j, h, t;
556
557 if (n < 2) error("'n >= 2' is required");
558 for (t = 0; incs[t] > n; t++);
559 if(decreasing)
560 #define less <
561 sort2_body
562 #undef less
563 else
564 #define less >
565 sort2_body
566 #undef less
567 }
568
R_rsort2(double * x,R_xlen_t n,Rboolean decreasing)569 static void R_rsort2(double *x, R_xlen_t n, Rboolean decreasing)
570 {
571 double v;
572 R_xlen_t i, j, h, t;
573
574 if (n < 2) error("'n >= 2' is required");
575 for (t = 0; incs[t] > n; t++);
576 if(decreasing)
577 #define less <
578 sort2_body
579 #undef less
580 else
581 #define less >
582 sort2_body
583 #undef less
584 }
585
R_csort2(Rcomplex * x,R_xlen_t n,Rboolean decreasing)586 static void R_csort2(Rcomplex *x, R_xlen_t n, Rboolean decreasing)
587 {
588 Rcomplex v;
589 R_xlen_t i, j, h, t;
590
591 if (n < 2) error("'n >= 2' is required");
592 for (t = 0; incs[t] > n; t++);
593 for (h = incs[t]; t < NI; h = incs[++t])
594 for (i = h; i < n; i++) {
595 v = x[i];
596 j = i;
597 if(decreasing)
598 while (j >= h && (x[j - h].r < v.r ||
599 (x[j - h].r == v.r && x[j - h].i < v.i)))
600 { x[j] = x[j - h]; j -= h; }
601 else
602 while (j >= h && (x[j - h].r > v.r ||
603 (x[j - h].r == v.r && x[j - h].i > v.i)))
604 { x[j] = x[j - h]; j -= h; }
605 x[j] = v;
606 }
607 }
608
ssort2(SEXP * x,R_xlen_t n,Rboolean decreasing)609 static void ssort2(SEXP *x, R_xlen_t n, Rboolean decreasing)
610 {
611 SEXP v;
612 R_xlen_t i, j, h, t;
613
614 if (n < 2) error("'n >= 2' is required");
615 for (t = 0; incs[t] > n; t++);
616 for (h = incs[t]; t < NI; h = incs[++t])
617 for (i = h; i < n; i++) {
618 v = x[i];
619 j = i;
620 PROTECT(v);
621 if(decreasing)
622 while (j >= h && scmp(x[j - h], v, TRUE) < 0)
623 { x[j] = x[j - h]; j -= h; }
624 else
625 while (j >= h && scmp(x[j - h], v, TRUE) > 0)
626 { x[j] = x[j - h]; j -= h; }
627 x[j] = v;
628 UNPROTECT(1); /* v */
629 }
630 }
631
632 /* The meat of sort.int() */
sortVector(SEXP s,Rboolean decreasing)633 void sortVector(SEXP s, Rboolean decreasing)
634 {
635 R_xlen_t n = XLENGTH(s);
636 if (n >= 2 && (decreasing || isUnsorted(s, FALSE)))
637 switch (TYPEOF(s)) {
638 case LGLSXP:
639 case INTSXP:
640 R_isort2(INTEGER(s), n, decreasing);
641 break;
642 case REALSXP:
643 R_rsort2(REAL(s), n, decreasing);
644 break;
645 case CPLXSXP:
646 R_csort2(COMPLEX(s), n, decreasing);
647 break;
648 case STRSXP:
649 ssort2(STRING_PTR(s), n, decreasing);
650 break;
651 default:
652 UNIMPLEMENTED_TYPE("sortVector", s);
653 }
654 }
655
656
657 /*--- Part III: Partial Sorting ---*/
658
659 /*
660 Partial sort so that x[k] is in the correct place, smaller to left,
661 larger to right
662
663 NOTA BENE: k < n required, and *not* checked here but in do_psort();
664 ----- infinite loop possible otherwise!
665 */
666 #define psort_body \
667 Rboolean nalast=TRUE; \
668 R_xlen_t L, R, i, j; \
669 \
670 for (L = lo, R = hi; L < R; ) { \
671 v = x[k]; \
672 for(i = L, j = R; i <= j;) { \
673 while (TYPE_CMP(x[i], v, nalast) < 0) i++; \
674 while (TYPE_CMP(v, x[j], nalast) < 0) j--; \
675 if (i <= j) { w = x[i]; x[i++] = x[j]; x[j--] = w; }\
676 } \
677 if (j < k) L = i; \
678 if (k < i) R = j; \
679 }
680
681
iPsort2(int * x,R_xlen_t lo,R_xlen_t hi,R_xlen_t k)682 static void iPsort2(int *x, R_xlen_t lo, R_xlen_t hi, R_xlen_t k)
683 {
684 int v, w;
685 #define TYPE_CMP icmp
686 psort_body
687 #undef TYPE_CMP
688 }
689
rPsort2(double * x,R_xlen_t lo,R_xlen_t hi,R_xlen_t k)690 static void rPsort2(double *x, R_xlen_t lo, R_xlen_t hi, R_xlen_t k)
691 {
692 double v, w;
693 #define TYPE_CMP rcmp
694 psort_body
695 #undef TYPE_CMP
696 }
697
cPsort2(Rcomplex * x,R_xlen_t lo,R_xlen_t hi,R_xlen_t k)698 static void cPsort2(Rcomplex *x, R_xlen_t lo, R_xlen_t hi, R_xlen_t k)
699 {
700 Rcomplex v, w;
701 #define TYPE_CMP ccmp
702 psort_body
703 #undef TYPE_CMP
704 }
705
706
sPsort2(SEXP * x,R_xlen_t lo,R_xlen_t hi,R_xlen_t k)707 static void sPsort2(SEXP *x, R_xlen_t lo, R_xlen_t hi, R_xlen_t k)
708 {
709 SEXP v, w;
710 #define TYPE_CMP scmp
711 psort_body
712 #undef TYPE_CMP
713 }
714
715
716 /* Needed for mistaken decision to put these in the API */
iPsort(int * x,int n,int k)717 void iPsort(int *x, int n, int k)
718 {
719 iPsort2(x, 0, n-1, k);
720 }
721
rPsort(double * x,int n,int k)722 void rPsort(double *x, int n, int k)
723 {
724 rPsort2(x, 0, n-1, k);
725 }
726
cPsort(Rcomplex * x,int n,int k)727 void cPsort(Rcomplex *x, int n, int k)
728 {
729 cPsort2(x, 0, n-1, k);
730 }
731
732 /* lo, hi, k are 0-based */
Psort(SEXP x,R_xlen_t lo,R_xlen_t hi,R_xlen_t k)733 static void Psort(SEXP x, R_xlen_t lo, R_xlen_t hi, R_xlen_t k)
734 {
735 /* Rprintf("looking for index %d in (%d, %d)\n", k, lo, hi);*/
736 switch (TYPEOF(x)) {
737 case LGLSXP:
738 case INTSXP:
739 iPsort2(INTEGER(x), lo, hi, k);
740 break;
741 case REALSXP:
742 rPsort2(REAL(x), lo, hi, k);
743 break;
744 case CPLXSXP:
745 cPsort2(COMPLEX(x), lo, hi, k);
746 break;
747 case STRSXP:
748 sPsort2(STRING_PTR(x), lo, hi, k);
749 break;
750 default:
751 UNIMPLEMENTED_TYPE("Psort", x);
752 }
753 }
754
755
756 /* Here ind are 1-based indices passed from R */
757 static void
Psort0(SEXP x,R_xlen_t lo,R_xlen_t hi,R_xlen_t * ind,int nind)758 Psort0(SEXP x, R_xlen_t lo, R_xlen_t hi, R_xlen_t *ind, int nind)
759 {
760 if(nind < 1 || hi - lo < 1) return;
761 if(nind <= 1)
762 Psort(x, lo, hi, ind[0] - 1);
763 else {
764 /* Look for index nearest the centre of the range */
765 int This = 0;
766 R_xlen_t mid = (lo+hi)/2, z;
767 for(int i = 0; i < nind; i++) if(ind[i] - 1 <= mid) This = i;
768 z = ind[This] - 1;
769 Psort(x, lo, hi, z);
770 Psort0(x, lo, z-1, ind, This);
771 Psort0(x, z+1, hi, ind + This + 1, nind - This -1);
772 }
773 }
774
775
776 /* FUNCTION psort(x, indices) */
do_psort(SEXP call,SEXP op,SEXP args,SEXP rho)777 SEXP attribute_hidden do_psort(SEXP call, SEXP op, SEXP args, SEXP rho)
778 {
779 checkArity(op, args);
780 SEXP x = CAR(args), p = CADR(args);
781
782 if (!isVectorAtomic(x))
783 error(_("only atomic vectors can be sorted"));
784 if(TYPEOF(x) == RAWSXP)
785 error(_("raw vectors cannot be sorted"));
786 R_xlen_t n = XLENGTH(x);
787 #ifdef LONG_VECTOR_SUPPORT
788 if(!IS_LONG_VEC(x) || TYPEOF(p) != REALSXP)
789 SETCADR(args, coerceVector(p, INTSXP));
790 p = CADR(args);
791 int nind = LENGTH(p);
792 R_xlen_t *l = (R_xlen_t *) R_alloc(nind, sizeof(R_xlen_t));
793 if (TYPEOF(p) == REALSXP) {
794 double *rl = REAL(p);
795 for (int i = 0; i < nind; i++) {
796 if (!R_FINITE(rl[i])) error(_("NA or infinite index"));
797 l[i] = (R_xlen_t) rl[i];
798 if (l[i] < 1 || l[i] > n)
799 error(_("index %ld outside bounds"), l[i]);
800 }
801 } else {
802 int *il = INTEGER(p);
803 for (int i = 0; i < nind; i++) {
804 if (il[i] == NA_INTEGER) error(_("NA index"));
805 if (il[i] < 1 || il[i] > n)
806 error(_("index %d outside bounds"), il[i]);
807 l[i] = il[i];
808 }
809 }
810 #else
811 SETCADR(args, coerceVector(p, INTSXP));
812 p = CADR(args);
813 int nind = LENGTH(p);
814 int *l = INTEGER(p);
815 for (int i = 0; i < nind; i++) {
816 if (l[i] == NA_INTEGER)
817 error(_("NA index"));
818 if (l[i] < 1 || l[i] > n)
819 error(_("index %d outside bounds"), l[i]);
820 }
821 #endif
822 SETCAR(args, duplicate(x));
823 SET_ATTRIB(CAR(args), R_NilValue); /* remove all attributes */
824 SET_OBJECT(CAR(args), 0); /* and the object bit */
825 Psort0(CAR(args), 0, n - 1, l, nind);
826 return CAR(args);
827 }
828
829
830 /*--- Part IV : Rank & Order ---*/
831
equal(R_xlen_t i,R_xlen_t j,SEXP x,Rboolean nalast,SEXP rho)832 static int equal(R_xlen_t i, R_xlen_t j, SEXP x, Rboolean nalast, SEXP rho)
833 {
834 int c = -1;
835
836 if (isObject(x) && !isNull(rho)) { /* so never any NAs */
837 /* evaluate .gt(x, i, j) */
838 SEXP si, sj, call;
839 PROTECT(si = ScalarInteger((int)i+1));
840 PROTECT(sj = ScalarInteger((int)j+1));
841 PROTECT(call = lang4(install(".gt"), x, si, sj));
842 c = asInteger(eval(call, rho));
843 UNPROTECT(3);
844 } else {
845 switch (TYPEOF(x)) {
846 case LGLSXP:
847 case INTSXP:
848 c = icmp(INTEGER(x)[i], INTEGER(x)[j], nalast);
849 break;
850 case REALSXP:
851 c = rcmp(REAL(x)[i], REAL(x)[j], nalast);
852 break;
853 case CPLXSXP:
854 c = ccmp(COMPLEX(x)[i], COMPLEX(x)[j], nalast);
855 break;
856 case STRSXP:
857 c = scmp(STRING_ELT(x, i), STRING_ELT(x, j), nalast);
858 break;
859 default:
860 UNIMPLEMENTED_TYPE("equal", x);
861 break;
862 }
863 }
864 if (c == 0)
865 return 1;
866 return 0;
867 }
868
greater(R_xlen_t i,R_xlen_t j,SEXP x,Rboolean nalast,Rboolean decreasing,SEXP rho)869 static int greater(R_xlen_t i, R_xlen_t j, SEXP x, Rboolean nalast,
870 Rboolean decreasing, SEXP rho)
871 {
872 int c = -1;
873
874 if (isObject(x) && !isNull(rho)) { /* so never any NAs */
875 /* evaluate .gt(x, i, j) */
876 SEXP si, sj, call;
877 PROTECT(si = ScalarInteger((int)i+1));
878 PROTECT(sj = ScalarInteger((int)j+1));
879 PROTECT(call = lang4(install(".gt"), x, si, sj));
880 c = asInteger(eval(call, rho));
881 UNPROTECT(3);
882 } else {
883 switch (TYPEOF(x)) {
884 case LGLSXP:
885 case INTSXP:
886 c = icmp(INTEGER(x)[i], INTEGER(x)[j], nalast);
887 break;
888 case REALSXP:
889 c = rcmp(REAL(x)[i], REAL(x)[j], nalast);
890 break;
891 case CPLXSXP:
892 c = ccmp(COMPLEX(x)[i], COMPLEX(x)[j], nalast);
893 break;
894 case STRSXP:
895 c = scmp(STRING_ELT(x, i), STRING_ELT(x, j), nalast);
896 break;
897 default:
898 UNIMPLEMENTED_TYPE("greater", x);
899 break;
900 }
901 }
902 if (decreasing) c = -c;
903 if (c > 0 || (c == 0 && j < i)) return 1; else return 0;
904 }
905
906 /* listgreater(): used as greater_sub in orderVector() in do_order(...) */
listgreater(int i,int j,SEXP key,Rboolean nalast,Rboolean decreasing)907 static int listgreater(int i, int j, SEXP key, Rboolean nalast,
908 Rboolean decreasing)
909 {
910 SEXP x;
911 int c = -1;
912
913 while (key != R_NilValue) {
914 x = CAR(key);
915 switch (TYPEOF(x)) {
916 case LGLSXP:
917 case INTSXP:
918 c = icmp(INTEGER(x)[i], INTEGER(x)[j], nalast);
919 break;
920 case REALSXP:
921 c = rcmp(REAL(x)[i], REAL(x)[j], nalast);
922 break;
923 case CPLXSXP:
924 c = ccmp(COMPLEX(x)[i], COMPLEX(x)[j], nalast);
925 break;
926 case STRSXP:
927 c = scmp(STRING_ELT(x, i), STRING_ELT(x, j), nalast);
928 break;
929 default:
930 UNIMPLEMENTED_TYPE("listgreater", x);
931 }
932 if (decreasing) c = -c;
933 if (c > 0)
934 return 1;
935 if (c < 0)
936 return 0;
937 key = CDR(key);
938 }
939 if (c == 0 && i < j) return 0; else return 1;
940 }
941
942
943 #define GREATER_2_SUB_DEF(FNAME, TYPE_1, TYPE_2, CMP_FN_1, CMP_FN_2) \
944 static int FNAME(int i, int j, \
945 TYPE_1 *x, TYPE_2 *y, \
946 Rboolean nalast, Rboolean decreasing) \
947 { \
948 int CMP_FN_1(TYPE_1, TYPE_1, Rboolean); \
949 int CMP_FN_2(TYPE_2, TYPE_2, Rboolean); \
950 \
951 int c = CMP_FN_1(x[i], x[j], nalast); \
952 if(c) { \
953 if (decreasing) c = -c; \
954 if (c > 0) return 1; \
955 /* else: (c < 0) */ return 0; \
956 } \
957 else {/* have a tie in x -- use y[]: */ \
958 c = CMP_FN_2(y[i], y[j], nalast); \
959 if(c) { \
960 if (decreasing) c = -c; \
961 if (c > 0) return 1; \
962 /* else: (c < 0) */ return 0; \
963 } \
964 else { /* tie in both x[] and y[] : */ \
965 if (i < j) return 0; \
966 /* else */ return 1; \
967 } \
968 } \
969 }
970
971 static const int sincs[17] = {
972 1073790977, 268460033, 67121153, 16783361, 4197377, 1050113,
973 262913, 65921, 16577, 4193, 1073, 281, 77, 23, 8, 1, 0
974 };
975
976 // Needs indx set to 0:(n-1) initially :
977 static void
orderVector(int * indx,int n,SEXP key,Rboolean nalast,Rboolean decreasing,int greater_sub (int,int,SEXP,Rboolean,Rboolean))978 orderVector(int *indx, int n, SEXP key, Rboolean nalast,
979 Rboolean decreasing,
980 int greater_sub(int, int, SEXP, Rboolean, Rboolean))
981 {
982 int i, j, h, t;
983 int itmp;
984
985 if (n < 2) return;
986 for (t = 0; sincs[t] > n; t++);
987 for (h = sincs[t]; t < 16; h = sincs[++t]) {
988 R_CheckUserInterrupt();
989 for (i = h; i < n; i++) {
990 itmp = indx[i];
991 j = i;
992 while (j >= h &&
993 greater_sub(indx[j - h], itmp, key, nalast^decreasing,
994 decreasing)) {
995 indx[j] = indx[j - h];
996 j -= h;
997 }
998 indx[j] = itmp;
999 }
1000 }
1001 }
1002
1003 #ifdef LONG_VECTOR_SUPPORT
listgreaterl(R_xlen_t i,R_xlen_t j,SEXP key,Rboolean nalast,Rboolean decreasing)1004 static int listgreaterl(R_xlen_t i, R_xlen_t j, SEXP key, Rboolean nalast,
1005 Rboolean decreasing)
1006 {
1007 SEXP x;
1008 int c = -1;
1009
1010 while (key != R_NilValue) {
1011 x = CAR(key);
1012 switch (TYPEOF(x)) {
1013 case LGLSXP:
1014 case INTSXP:
1015 c = icmp(INTEGER(x)[i], INTEGER(x)[j], nalast);
1016 break;
1017 case REALSXP:
1018 c = rcmp(REAL(x)[i], REAL(x)[j], nalast);
1019 break;
1020 case CPLXSXP:
1021 c = ccmp(COMPLEX(x)[i], COMPLEX(x)[j], nalast);
1022 break;
1023 case STRSXP:
1024 c = scmp(STRING_ELT(x, i), STRING_ELT(x, j), nalast);
1025 break;
1026 default:
1027 UNIMPLEMENTED_TYPE("listgreater", x);
1028 }
1029 if (decreasing) c = -c;
1030 if (c > 0)
1031 return 1;
1032 if (c < 0)
1033 return 0;
1034 key = CDR(key);
1035 }
1036 if (c == 0 && i < j) return 0; else return 1;
1037 }
1038
1039 static void
orderVectorl(R_xlen_t * indx,R_xlen_t n,SEXP key,Rboolean nalast,Rboolean decreasing,int greater_sub (R_xlen_t,R_xlen_t,SEXP,Rboolean,Rboolean))1040 orderVectorl(R_xlen_t *indx, R_xlen_t n, SEXP key, Rboolean nalast,
1041 Rboolean decreasing,
1042 int greater_sub(R_xlen_t, R_xlen_t, SEXP, Rboolean, Rboolean))
1043 {
1044 int t;
1045 R_xlen_t i, j, h;
1046 R_xlen_t itmp;
1047
1048 if (n < 2) return;
1049 for (t = 0; incs[t] > n; t++);
1050 for (h = incs[t]; t < NI; h = incs[++t]) {
1051 R_CheckUserInterrupt();
1052 for (i = h; i < n; i++) {
1053 itmp = indx[i];
1054 j = i;
1055 while (j >= h &&
1056 greater_sub(indx[j - h], itmp, key, nalast^decreasing,
1057 decreasing)) {
1058 indx[j] = indx[j - h];
1059 j -= h;
1060 }
1061 indx[j] = itmp;
1062 }
1063 }
1064 }
1065 #endif
1066
1067 #ifdef UNUSED
1068 #define ORD_2_BODY(FNAME, TYPE_1, TYPE_2, GREATER_2_SUB) \
1069 void FNAME(int *indx, int n, TYPE_1 *x, TYPE_2 *y, \
1070 Rboolean nalast, Rboolean decreasing) \
1071 { \
1072 int t; \
1073 for(t = 0; t < n; t++) indx[t] = t; /* indx[] <- 0:(n-1) */ \
1074 if (n < 2) return; \
1075 for(t = 0; sincs[t] > n; t++); \
1076 for (int h = sincs[t]; t < 16; h = sincs[++t]) \
1077 for (int i = h; i < n; i++) { \
1078 int itmp = indx[i], j = i; \
1079 while (j >= h && \
1080 GREATER_2_SUB(indx[j - h], itmp, x, y, \
1081 nalast^decreasing, decreasing)) { \
1082 indx[j] = indx[j - h]; \
1083 j -= h; \
1084 } \
1085 indx[j] = itmp; \
1086 } \
1087 }
1088
ORD_2_BODY(R_order2double,double,double,double2greater)1089 ORD_2_BODY(R_order2double , double, double, double2greater)
1090 ORD_2_BODY(R_order2int , int, int, int2greater)
1091 ORD_2_BODY(R_order2dbl_int, double, int, dblint2greater)
1092 ORD_2_BODY(R_order2int_dbl, int, double, intdbl2greater)
1093
1094
1095 GREATER_2_SUB_DEF(double2greater, double, double, rcmp, rcmp)
1096 GREATER_2_SUB_DEF(int2greater, int, int, icmp, icmp)
1097 GREATER_2_SUB_DEF(dblint2greater, double, int, rcmp, icmp)
1098 GREATER_2_SUB_DEF(intdbl2greater, int, double, icmp, rcmp)
1099 #endif
1100
1101 #define sort2_with_index \
1102 for (h = sincs[t]; t < 16; h = sincs[++t]) { \
1103 R_CheckUserInterrupt(); \
1104 for (i = lo + h; i <= hi; i++) { \
1105 itmp = indx[i]; \
1106 j = i; \
1107 while (j >= lo + h && less(indx[j - h], itmp)) { \
1108 indx[j] = indx[j - h]; j -= h; } \
1109 indx[j] = itmp; \
1110 } \
1111 }
1112
1113
1114 /* TODO: once LONG_VECTOR_SUPPORT and R_xlen_t belong to the R API,
1115 * ---- also add "long" versions, say,
1116 * R_orderVectorl (R_xlen_t *indx, R_xlen_t n, SEXP arglist, ...)
1117 * R_orderVector1l(R_xlen_t *indx, R_xlen_t n, SEXP arg, ...)
1118 * to the API */
1119
1120 // Usage: R_orderVector(indx, n, Rf_lang2(x,y), nalast, decreasing)
1121 void R_orderVector(int *indx, // must be pre-allocated to length >= n
1122 int n,
1123 SEXP arglist, // <- e.g. Rf_lang2(x,y)
1124 Rboolean nalast, Rboolean decreasing)
1125 {
1126 // idx[] <- 0:(n-1) :
1127 for(int i = 0; i < n; i++) indx[i] = i;
1128 orderVector(indx, n, arglist, nalast, decreasing, listgreater);
1129 return;
1130 }
1131
1132 // Fast version of 1-argument case of R_orderVector()
R_orderVector1(int * indx,int n,SEXP x,Rboolean nalast,Rboolean decreasing)1133 void R_orderVector1(int *indx, int n, SEXP x,
1134 Rboolean nalast, Rboolean decreasing)
1135 {
1136 for(int i = 0; i < n; i++) indx[i] = i;
1137 orderVector1(indx, n, x, nalast, decreasing, R_NilValue);
1138 }
1139
1140
1141
1142 /* Needs indx set to 0:(n-1) initially.
1143 Also used by do_options and ../gnuwin32/extra.c
1144 Called with rho != R_NilValue only from do_rank, when NAs are not involved.
1145 */
1146 void attribute_hidden
orderVector1(int * indx,int n,SEXP key,Rboolean nalast,Rboolean decreasing,SEXP rho)1147 orderVector1(int *indx, int n, SEXP key, Rboolean nalast, Rboolean decreasing,
1148 SEXP rho)
1149 {
1150 int c, i, j, h, t, lo = 0, hi = n-1;
1151 int itmp, *isna = NULL, numna = 0;
1152 int *ix = NULL /* -Wall */;
1153 double *x = NULL /* -Wall */;
1154 Rcomplex *cx = NULL /* -Wall */;
1155 SEXP *sx = NULL /* -Wall */;
1156
1157 if (n < 2) return;
1158 switch (TYPEOF(key)) {
1159 case LGLSXP:
1160 case INTSXP:
1161 ix = INTEGER(key);
1162 break;
1163 case REALSXP:
1164 x = REAL(key);
1165 break;
1166 case STRSXP:
1167 sx = STRING_PTR(key);
1168 break;
1169 case CPLXSXP:
1170 cx = COMPLEX(key);
1171 break;
1172 }
1173
1174 if(isNull(rho)) {
1175 /* First sort NAs to one end */
1176 isna = Calloc(n, int);
1177 switch (TYPEOF(key)) {
1178 case LGLSXP:
1179 case INTSXP:
1180 for (i = 0; i < n; i++) isna[i] = (ix[i] == NA_INTEGER);
1181 break;
1182 case REALSXP:
1183 for (i = 0; i < n; i++) isna[i] = ISNAN(x[i]);
1184 break;
1185 case STRSXP:
1186 for (i = 0; i < n; i++) isna[i] = (sx[i] == NA_STRING);
1187 break;
1188 case CPLXSXP:
1189 for (i = 0; i < n; i++) isna[i] = ISNAN(cx[i].r) || ISNAN(cx[i].i);
1190 break;
1191 default:
1192 UNIMPLEMENTED_TYPE("orderVector1", key);
1193 }
1194 for (i = 0; i < n; i++) numna += isna[i];
1195
1196 if(numna)
1197 switch (TYPEOF(key)) {
1198 case LGLSXP:
1199 case INTSXP:
1200 case REALSXP:
1201 case STRSXP:
1202 case CPLXSXP:
1203 if (!nalast) for (i = 0; i < n; i++) isna[i] = !isna[i];
1204 for (t = 0; sincs[t] > n; t++);
1205 #define less(a, b) (isna[a] > isna[b] || (isna[a] == isna[b] && a > b))
1206 sort2_with_index
1207 #undef less
1208 if (n - numna < 2) {
1209 Free(isna);
1210 return;
1211 }
1212 if (nalast) hi -= numna; else lo += numna;
1213 }
1214 }
1215
1216 /* Shell sort isn't stable, so add test on index */
1217
1218 for (t = 0; sincs[t] > hi-lo+1; t++);
1219
1220 if (isObject(key) && !isNull(rho)) {
1221 /* only reached from do_rank */
1222 #define less(a, b) greater(a, b, key, nalast^decreasing, decreasing, rho)
1223 sort2_with_index
1224 #undef less
1225 } else {
1226 switch (TYPEOF(key)) {
1227 case LGLSXP:
1228 case INTSXP:
1229 if (decreasing) {
1230 #define less(a, b) (ix[a] < ix[b] || (ix[a] == ix[b] && a > b))
1231 sort2_with_index
1232 #undef less
1233 } else {
1234 #define less(a, b) (ix[a] > ix[b] || (ix[a] == ix[b] && a > b))
1235 sort2_with_index
1236 #undef less
1237 }
1238 break;
1239 case REALSXP:
1240 if (decreasing) {
1241 #define less(a, b) (x[a] < x[b] || (x[a] == x[b] && a > b))
1242 sort2_with_index
1243 #undef less
1244 } else {
1245 #define less(a, b) (x[a] > x[b] || (x[a] == x[b] && a > b))
1246 sort2_with_index
1247 #undef less
1248 }
1249 break;
1250 case CPLXSXP:
1251 if (decreasing) {
1252 #define less(a, b) (ccmp(cx[a], cx[b], 0) < 0 || (cx[a].r == cx[b].r && cx[a].i == cx[b].i && a > b))
1253 sort2_with_index
1254 #undef less
1255 } else {
1256 #define less(a, b) (ccmp(cx[a], cx[b], 0) > 0 || (cx[a].r == cx[b].r && cx[a].i == cx[b].i && a > b))
1257 sort2_with_index
1258 #undef less
1259 }
1260 break;
1261 case STRSXP:
1262 if (decreasing)
1263 #define less(a, b) (c = Scollate(sx[a], sx[b]), c < 0 || (c == 0 && a > b))
1264 sort2_with_index
1265 #undef less
1266 else
1267 #define less(a, b) (c = Scollate(sx[a], sx[b]), c > 0 || (c == 0 && a > b))
1268 sort2_with_index
1269 #undef less
1270 break;
1271 default: /* only reached from do_rank */
1272 #define less(a, b) greater(a, b, key, nalast^decreasing, decreasing, rho)
1273 sort2_with_index
1274 #undef less
1275 }
1276 }
1277 if(isna) Free(isna);
1278 }
1279
1280 /* version for long vectors */
1281 #ifdef LONG_VECTOR_SUPPORT
1282 static void
orderVector1l(R_xlen_t * indx,R_xlen_t n,SEXP key,Rboolean nalast,Rboolean decreasing,SEXP rho)1283 orderVector1l(R_xlen_t *indx, R_xlen_t n, SEXP key, Rboolean nalast,
1284 Rboolean decreasing, SEXP rho)
1285 {
1286 R_xlen_t c, i, j, h, t, lo = 0, hi = n-1;
1287 int *isna = NULL, numna = 0;
1288 int *ix = NULL /* -Wall */;
1289 double *x = NULL /* -Wall */;
1290 Rcomplex *cx = NULL /* -Wall */;
1291 SEXP *sx = NULL /* -Wall */;
1292 R_xlen_t itmp;
1293
1294 if (n < 2) return;
1295 switch (TYPEOF(key)) {
1296 case LGLSXP:
1297 case INTSXP:
1298 ix = INTEGER(key);
1299 break;
1300 case REALSXP:
1301 x = REAL(key);
1302 break;
1303 case STRSXP:
1304 sx = STRING_PTR(key);
1305 break;
1306 case CPLXSXP:
1307 cx = COMPLEX(key);
1308 break;
1309 }
1310
1311 if(isNull(rho)) {
1312 /* First sort NAs to one end */
1313 isna = Calloc(n, int);
1314 switch (TYPEOF(key)) {
1315 case LGLSXP:
1316 case INTSXP:
1317 for (i = 0; i < n; i++) isna[i] = (ix[i] == NA_INTEGER);
1318 break;
1319 case REALSXP:
1320 for (i = 0; i < n; i++) isna[i] = ISNAN(x[i]);
1321 break;
1322 case STRSXP:
1323 for (i = 0; i < n; i++) isna[i] = (sx[i] == NA_STRING);
1324 break;
1325 case CPLXSXP:
1326 for (i = 0; i < n; i++) isna[i] = ISNAN(cx[i].r) || ISNAN(cx[i].i);
1327 break;
1328 default:
1329 UNIMPLEMENTED_TYPE("orderVector1", key);
1330 }
1331 for (i = 0; i < n; i++) numna += isna[i];
1332
1333 if(numna)
1334 switch (TYPEOF(key)) {
1335 case LGLSXP:
1336 case INTSXP:
1337 case REALSXP:
1338 case STRSXP:
1339 case CPLXSXP:
1340 if (!nalast) for (i = 0; i < n; i++) isna[i] = !isna[i];
1341 for (t = 0; sincs[t] > n; t++);
1342 #define less(a, b) (isna[a] > isna[b] || (isna[a] == isna[b] && a > b))
1343 sort2_with_index
1344 #undef less
1345 if (n - numna < 2) {
1346 Free(isna);
1347 return;
1348 }
1349 if (nalast) hi -= numna; else lo += numna;
1350 }
1351 }
1352
1353 /* Shell sort isn't stable, so add test on index */
1354
1355 for (t = 0; sincs[t] > hi-lo+1; t++);
1356
1357 if (isObject(key) && !isNull(rho)) {
1358 /* only reached from do_rank */
1359 #define less(a, b) greater(a, b, key, nalast^decreasing, decreasing, rho)
1360 sort2_with_index
1361 #undef less
1362 } else {
1363 switch (TYPEOF(key)) {
1364 case LGLSXP:
1365 case INTSXP:
1366 if (decreasing) {
1367 #define less(a, b) (ix[a] < ix[b] || (ix[a] == ix[b] && a > b))
1368 sort2_with_index
1369 #undef less
1370 } else {
1371 #define less(a, b) (ix[a] > ix[b] || (ix[a] == ix[b] && a > b))
1372 sort2_with_index
1373 #undef less
1374 }
1375 break;
1376 case REALSXP:
1377 if (decreasing) {
1378 #define less(a, b) (x[a] < x[b] || (x[a] == x[b] && a > b))
1379 sort2_with_index
1380 #undef less
1381 } else {
1382 #define less(a, b) (x[a] > x[b] || (x[a] == x[b] && a > b))
1383 sort2_with_index
1384 #undef less
1385 }
1386 break;
1387 case CPLXSXP:
1388 if (decreasing) {
1389 #define less(a, b) (ccmp(cx[a], cx[b], 0) < 0 || (cx[a].r == cx[b].r && cx[a].i == cx[b].i && a > b))
1390 sort2_with_index
1391 #undef less
1392 } else {
1393 #define less(a, b) (ccmp(cx[a], cx[b], 0) > 0 || (cx[a].r == cx[b].r && cx[a].i == cx[b].i && a > b))
1394 sort2_with_index
1395 #undef less
1396 }
1397 break;
1398 case STRSXP:
1399 if (decreasing)
1400 #define less(a, b) (c=Scollate(sx[a], sx[b]), c < 0 || (c == 0 && a > b))
1401 sort2_with_index
1402 #undef less
1403 else
1404 #define less(a, b) (c=Scollate(sx[a], sx[b]), c > 0 || (c == 0 && a > b))
1405 sort2_with_index
1406 #undef less
1407 break;
1408 default: /* only reached from do_rank */
1409 #define less(a, b) greater(a, b, key, nalast^decreasing, decreasing, rho)
1410 sort2_with_index
1411 #undef less
1412 }
1413 }
1414 if(isna) Free(isna);
1415 }
1416 #endif
1417
1418 /* FUNCTION order(...) */
do_order(SEXP call,SEXP op,SEXP args,SEXP rho)1419 SEXP attribute_hidden do_order(SEXP call, SEXP op, SEXP args, SEXP rho)
1420 {
1421 SEXP ap, ans = R_NilValue /* -Wall */;
1422 int narg = 0;
1423 R_xlen_t n = -1;
1424 Rboolean nalast, decreasing;
1425
1426 nalast = asLogical(CAR(args));
1427 if(nalast == NA_LOGICAL)
1428 error(_("invalid '%s' value"), "na.last");
1429 args = CDR(args);
1430 decreasing = asLogical(CAR(args));
1431 if(decreasing == NA_LOGICAL)
1432 error(_("'decreasing' must be TRUE or FALSE"));
1433 args = CDR(args);
1434 if (args == R_NilValue)
1435 return R_NilValue;
1436
1437 if (isVector(CAR(args)))
1438 n = XLENGTH(CAR(args));
1439 for (ap = args; ap != R_NilValue; ap = CDR(ap), narg++) {
1440 if (!isVector(CAR(ap)))
1441 error(_("argument %d is not a vector"), narg + 1);
1442 if (XLENGTH(CAR(ap)) != n)
1443 error(_("argument lengths differ"));
1444 }
1445 /* NB: collation functions such as Scollate might allocate */
1446 if (n != 0) {
1447 if(narg == 1) {
1448 #ifdef LONG_VECTOR_SUPPORT
1449 if (n > INT_MAX) {
1450 PROTECT(ans = allocVector(REALSXP, n));
1451 R_xlen_t *in = (R_xlen_t *) R_alloc(n, sizeof(R_xlen_t));
1452 for (R_xlen_t i = 0; i < n; i++) in[i] = i;
1453 orderVector1l(in, n, CAR(args), nalast, decreasing,
1454 R_NilValue);
1455 for (R_xlen_t i = 0; i < n; i++) REAL(ans)[i] = in[i] + 1;
1456 } else
1457 #endif
1458 {
1459 PROTECT(ans = allocVector(INTSXP, n));
1460 for (R_xlen_t i = 0; i < n; i++) INTEGER(ans)[i] = (int) i;
1461 orderVector1(INTEGER(ans), (int)n, CAR(args), nalast,
1462 decreasing, R_NilValue);
1463 for (R_xlen_t i = 0; i < n; i++) INTEGER(ans)[i]++;
1464 }
1465 } else {
1466 #ifdef LONG_VECTOR_SUPPORT
1467 if (n > INT_MAX) {
1468 PROTECT(ans = allocVector(REALSXP, n));
1469 R_xlen_t *in = (R_xlen_t *) R_alloc(n, sizeof(R_xlen_t));
1470 for (R_xlen_t i = 0; i < n; i++) in[i] = i;
1471 orderVectorl(in, n, CAR(args), nalast, decreasing,
1472 listgreaterl);
1473 for (R_xlen_t i = 0; i < n; i++) REAL(ans)[i] = in[i] + 1;
1474 } else
1475 #endif
1476 {
1477 PROTECT(ans = allocVector(INTSXP, n));
1478 for (R_xlen_t i = 0; i < n; i++) INTEGER(ans)[i] = (int) i;
1479 orderVector(INTEGER(ans), (int) n, args, nalast,
1480 decreasing, listgreater);
1481 for (R_xlen_t i = 0; i < n; i++) INTEGER(ans)[i]++;
1482 }
1483 }
1484 UNPROTECT(1);
1485 return ans;
1486 } else return allocVector(INTSXP, 0);
1487 }
1488
1489 /* FUNCTION: rank(x, length, ties.method) */
do_rank(SEXP call,SEXP op,SEXP args,SEXP rho)1490 SEXP attribute_hidden do_rank(SEXP call, SEXP op, SEXP args, SEXP rho)
1491 {
1492 SEXP rank, x;
1493 int *ik = NULL /* -Wall */;
1494 double *rk = NULL /* -Wall */;
1495 enum {AVERAGE, MAX, MIN} ties_kind = AVERAGE;
1496 Rboolean isLong = FALSE;
1497
1498 checkArity(op, args);
1499 x = CAR(args);
1500 if(TYPEOF(x) == RAWSXP && !isObject(x))
1501 error(_("raw vectors cannot be sorted"));
1502 // n := sn := length(x) :
1503 #ifdef LONG_VECTOR_SUPPORT
1504 SEXP sn = CADR(args);
1505 R_xlen_t n;
1506 if (TYPEOF(sn) == REALSXP) {
1507 double d = REAL(x)[0];
1508 if(ISNAN(d)) error(_("vector size cannot be NA/NaN"));
1509 if(!R_FINITE(d)) error(_("vector size cannot be infinite"));
1510 if(d > R_XLEN_T_MAX) error(_("vector size specified is too large"));
1511 n = (R_xlen_t) d;
1512 if (n < 0) error(_("invalid '%s' value"), "length(xx)");
1513 } else {
1514 int nn = asInteger(sn);
1515 if (nn == NA_INTEGER || nn < 0)
1516 error(_("invalid '%s' value"), "length(xx)");
1517 n = nn;
1518 }
1519 isLong = n > INT_MAX;
1520 #else
1521 int n = asInteger(CADR(args));
1522 if (n == NA_INTEGER || n < 0)
1523 error(_("invalid '%s' value"), "length(xx)");
1524 #endif
1525 const char *ties_str = CHAR(asChar(CADDR(args)));
1526 if(!strcmp(ties_str, "average")) ties_kind = AVERAGE;
1527 else if(!strcmp(ties_str, "max")) ties_kind = MAX;
1528 else if(!strcmp(ties_str, "min")) ties_kind = MIN;
1529 else error(_("invalid ties.method for rank() [should never happen]"));
1530 if (ties_kind == AVERAGE || isLong) {
1531 PROTECT(rank = allocVector(REALSXP, n));
1532 rk = REAL(rank);
1533 } else {
1534 PROTECT(rank = allocVector(INTSXP, n));
1535 ik = INTEGER(rank);
1536 }
1537 if (n > 0) {
1538 #ifdef LONG_VECTOR_SUPPORT
1539 if(isLong) {
1540 R_xlen_t i, j, k;
1541 R_xlen_t *in = (R_xlen_t *) R_alloc(n, sizeof(R_xlen_t));
1542 for (i = 0; i < n; i++) in[i] = i;
1543 orderVector1l(in, n, x, TRUE, FALSE, rho);
1544 for (i = 0; i < n; i = j+1) {
1545 j = i;
1546 while ((j < n - 1) && equal(in[j], in[j + 1], x, TRUE, rho)) j++;
1547 switch(ties_kind) {
1548 case AVERAGE:
1549 for (k = i; k <= j; k++)
1550 rk[in[k]] = (i + j + 2) / 2.;
1551 break;
1552 case MAX:
1553 for (k = i; k <= j; k++) rk[in[k]] = j+1;
1554 break;
1555 case MIN:
1556 for (k = i; k <= j; k++) rk[in[k]] = i+1;
1557 break;
1558 }
1559 }
1560 } else
1561 #endif
1562 {
1563 int i, j, k;
1564 int *in = (int *) R_alloc(n, sizeof(int));
1565 for (i = 0; i < n; i++) in[i] = i;
1566 orderVector1(in, (int) n, x, TRUE, FALSE, rho);
1567 for (i = 0; i < n; i = j+1) {
1568 j = i;
1569 while ((j < n - 1) && equal(in[j], in[j + 1], x, TRUE, rho)) j++;
1570 switch(ties_kind) {
1571 case AVERAGE:
1572 for (k = i; k <= j; k++)
1573 rk[in[k]] = (i + j + 2) / 2.;
1574 break;
1575 case MAX:
1576 for (k = i; k <= j; k++) ik[in[k]] = j+1;
1577 break;
1578 case MIN:
1579 for (k = i; k <= j; k++) ik[in[k]] = i+1;
1580 break;
1581 }
1582 }
1583 }
1584 }
1585 UNPROTECT(1);
1586 return rank;
1587 }
1588
do_xtfrm(SEXP call,SEXP op,SEXP args,SEXP rho)1589 SEXP attribute_hidden do_xtfrm(SEXP call, SEXP op, SEXP args, SEXP rho)
1590 {
1591 SEXP fn, prargs, ans;
1592
1593 checkArity(op, args);
1594 check1arg(args, call, "x");
1595
1596 /* DispatchOrEval internal generic: xtfrm */
1597 if(DispatchOrEval(call, op, "xtfrm", args, rho, &ans, 0, 1)) return ans;
1598 /* otherwise dispatch the default method */
1599 PROTECT(fn = findFun(install("xtfrm.default"), rho));
1600 PROTECT(prargs = promiseArgs(args, R_GlobalEnv));
1601 SET_PRVALUE(CAR(prargs), CAR(args));
1602 ans = applyClosure(call, fn, prargs, rho, R_NilValue);
1603 UNPROTECT(2);
1604 return ans;
1605
1606 }
1607