1 #define R_NO_REMAP
2 #include <R.h>
3 #include <Rinternals.h>
4 #include <stdbool.h>
5 
6 // Scalar operators -------------------------------------------------------
7 
8 // IEEE 754 defines binary64 as
9 // * 1  bit : sign
10 // * 11 bits: exponent
11 // * 52 bits: significand
12 //
13 // R stores the value "1954" in the last 32 bits: this payload marks
14 // the value as a NA, not a regular NaN.
15 //
16 // (Note that this discussion like most discussion of FP on the web, assumes
17 // a big-endian architecture - in little endian the sign bit is the last
18 // bit)
19 
20 typedef union {
21   double value;           // 8 bytes
22   char byte[8];           // 8 * 1 bytes
23 } ieee_double;
24 
25 
26 #ifdef WORDS_BIGENDIAN
27 // First two bytes are sign & expoonent
28 // Last four bytes are 1954
29 const int TAG_BYTE = 3;
30 #else
31 const int TAG_BYTE = 4;
32 #endif
33 
make_tagged_na(char x)34 double make_tagged_na(char x) {
35   ieee_double y;
36 
37   y.value = NA_REAL;
38   y.byte[TAG_BYTE] = x;
39 
40   return y.value;
41 }
42 
tagged_na_value(double x)43 char tagged_na_value(double x) {
44   ieee_double y;
45   y.value = x;
46 
47   return y.byte[TAG_BYTE];
48 }
49 
first_char(SEXP x)50 char first_char(SEXP x) {
51   if (TYPEOF(x) != CHARSXP)
52     return '\0';
53 
54   if (x == NA_STRING)
55     return '\0';
56 
57   return CHAR(x)[0];
58 }
59 
60 // Vectorised wrappers -----------------------------------------------------
61 
tagged_na_(SEXP x)62 SEXP tagged_na_(SEXP x) {
63   if (TYPEOF(x) != STRSXP)
64     Rf_errorcall(R_NilValue, "`x` must be a character vector");
65 
66   int n = Rf_length(x);
67   SEXP out = PROTECT(Rf_allocVector(REALSXP, n));
68 
69   for (int i = 0; i < n; ++i) {
70     char xi = first_char(STRING_ELT(x, i));
71     REAL(out)[i] = make_tagged_na(xi);
72   }
73 
74   UNPROTECT(1);
75   return out;
76 }
77 
na_tag_(SEXP x)78 SEXP na_tag_(SEXP x) {
79   if (TYPEOF(x) != REALSXP)
80     Rf_errorcall(R_NilValue, "`x` must be a double vector");
81 
82   int n = Rf_length(x);
83   SEXP out = PROTECT(Rf_allocVector(STRSXP, n));
84 
85   for (int i = 0; i < n; ++i) {
86     double xi = REAL(x)[i];
87 
88     if (!isnan(xi)) {
89       SET_STRING_ELT(out, i, NA_STRING);
90     } else {
91       char tag = tagged_na_value(xi);
92       if (tag == '\0') {
93         SET_STRING_ELT(out, i, NA_STRING);
94       } else {
95         SET_STRING_ELT(out, i, Rf_mkCharLenCE(&tag, 1, CE_UTF8));
96       }
97     }
98   }
99 
100   UNPROTECT(1);
101   return out;
102 }
103 
falses(int n)104 SEXP falses(int n) {
105   SEXP out = PROTECT(Rf_allocVector(LGLSXP, n));
106   for (int i = 0; i < n; ++i)
107     LOGICAL(out)[i] = 0;
108 
109   UNPROTECT(1);
110   return out;
111 }
112 
is_tagged_na_(SEXP x,SEXP tag_)113 SEXP is_tagged_na_(SEXP x, SEXP tag_) {
114   if (TYPEOF(x) != REALSXP) {
115     return falses(Rf_length(x));
116   }
117 
118   bool has_tag;
119   char check_tag;
120   if (TYPEOF(tag_) == NILSXP) {
121     has_tag = false;
122     check_tag = '\0';
123   } else if (TYPEOF(tag_) == STRSXP) {
124     if (Rf_length(tag_) != 1)
125       Rf_errorcall(R_NilValue, "`tag` must be a character vector of length 1");
126     has_tag = true;
127     check_tag = first_char(STRING_ELT(tag_, 0));
128   } else {
129     Rf_errorcall(R_NilValue, "`tag` must be NULL or a character vector");
130   }
131 
132   int n = Rf_length(x);
133   SEXP out = PROTECT(Rf_allocVector(LGLSXP, n));
134 
135   for (int i = 0; i < n; ++i) {
136     double xi = REAL(x)[i];
137 
138     if (!isnan(xi)) {
139       LOGICAL(out)[i] = false;
140     } else {
141       char tag = tagged_na_value(xi);
142       if (tag == '\0') {
143         LOGICAL(out)[i] = false;
144       } else {
145         if (has_tag) {
146           LOGICAL(out)[i] = tag == check_tag;
147         } else {
148           LOGICAL(out)[i] = true;
149         }
150       }
151     }
152   }
153 
154   UNPROTECT(1);
155   return out;
156 }
157