1 /**
2 * scan_ccns2:
3 * additional filters for scanning credit card numbers.
4 * used by the scan_accts.flex system.
5 */
6
7 #include <cassert>
8
9
10 #include "config.h"
11 #include "scan_ccns2.h"
12
13 #include "be13_api/utils.h"
14 #include "dfxml_cpp/src/hash_t.h"
15
16 #include "be13_api/scanner_params.h"
17
18 int scan_ccns2_debug=0;
19
20
21 /* credit2.cpp:
22 * A filter to scan stdin to stdout, pass through only the lines
23 * that have valid credit-card numbers by our feature detector.
24 */
25
digit_val(char cc)26 inline int digit_val(char cc)
27 {
28 return cc - '0';
29 }
30
31
32 /** extract the digits from a buffer of a given length
33 * into a null-termianted array (which must be at least len+1).
34 * Return 0 if extract is successful and if the count of non-digit
35 * numbers is either 0, 3 (for credit card numbers beginning with a 4 or 5)
36 * or 2 (for credit card numbers beginning with a 3).
37 */
extract_digits_and_test(const char * buf,int len,char * digits)38 static int extract_digits_and_test(const char *buf,int len,char *digits)
39 {
40 int nondigit_count = 0;
41 while(*buf && len){
42 if(isdigit(*buf)) *digits++ = *buf;
43 else nondigit_count++;
44 buf++;
45 len--;
46 }
47 *digits = 0; // null-terminate
48
49 if(nondigit_count==0) return 0;
50 if((digits[0]=='4' || digits[0]=='5') && nondigit_count==3){
51 return 0; // visa or mastercard
52 }
53 if((digits[0]=='3') && (nondigit_count==2)){
54 return 0; // american express
55 }
56 return -1;
57 }
58
59 /* Return true if the string only has hex digits */
only_hex_digits(const char * buf,int len)60 static int only_hex_digits(const char *buf,int len)
61 {
62 while(*buf && len){
63 if(ishexnumber(*buf)==0) return 0;
64 buf++;
65 len--;
66 }
67 return 1;
68 }
69
only_dec_digits(const char * buf,int len)70 static int only_dec_digits(const char *buf,int len)
71 {
72 while(*buf && len){
73 if(isdigit(*buf)==0) return 0;
74 buf++;
75 len--;
76 }
77 return 1;
78 }
79
80
81 /****************************************************************
82 *** The tests. Note that sense is reversed.
83 ****************************************************************/
84
85
86
87 /* int ccv1(const char *str,int len)
88 * Return 0 if a number follows the
89 * Credit Card Number Validation Algorithm Version #1, -1 if it fails
90 * (Version 2 is a pure database lookup based on the 3 digits on the back panel.)
91 */
92
ccv1_test(const char * digits)93 static int ccv1_test(const char *digits)
94 {
95 int chk=0;
96 int double_flag=0; // is number doubled?
97 int len = strlen(digits);
98 int i;
99 int doubled[] = { 0,2,4,6,8,1,3,5,7,9 }; /* what are number when "doubled" */
100
101 for(i=len-1;i>=0;i--){
102 int val = digit_val(digits[i]);
103 if(double_flag==0){
104 chk += val;
105 double_flag = 1;
106 } else {
107 chk += doubled[val];
108 double_flag = 0;
109 }
110 }
111
112 if ( (chk%10) == 0 ) {
113 return 0; // passed alg
114 }
115 return -1;
116 }
117
118 /* histogram_test:
119 * Compute the historgram of the number.
120 * If one digit is repeated more than 7 times, it is not valid.
121 * If two sets of digits are repeated more than 5 times, it is not valid.
122 */
histogram_test(const char * digits)123 static int histogram_test(const char *digits)
124 {
125 int cntscore = 0;
126 int digit_counts[10]; // count of each character
127
128 memset((void*)digit_counts,0,sizeof(digit_counts));
129 while(*digits){
130 digit_counts[digit_val(*digits)]++;
131 digits++;
132 }
133
134 /* If we have more than 7 of one digit,
135 * or two digits with more than 5,
136 * this isn't a valid number.
137 */
138 for(int i=0; i<10; i++) {
139 if (digit_counts[i]>7) { return -1;}
140 if (digit_counts[i]>4) { cntscore ++;}
141 }
142 if(cntscore >=2) return -1;
143 return 0; // passed histogram test
144 }
145
146
147 /*
148 * Called to display strings. The first character is not part of the number.
149 */
150
151 /** Return the value of the first 4 digites of a buffer, as an integer */
int4(const char * cc)152 static int int4(const char *cc)
153 {
154 char buf[5];
155 for(int i=0;i<4 && cc[i];i++){
156 buf[i] = cc[i];
157 }
158 buf[4] = 0;
159 return atoi(buf);
160 }
161
162 /** Return the value of the first 6 digites of a buffer, as an integer */
int6(const char * cc)163 static int int6(const char *cc)
164 {
165 char buf[7];
166 for(int i=0;i<6 && cc[i];i++){
167 buf[i] = cc[i];
168 }
169 buf[6] = 0;
170 return atoi(buf);
171 }
172
pattern_test(const char * digits)173 static int pattern_test(const char *digits)
174 {
175 int a = int4(digits);
176 int b = int4(digits+4);
177 int c = int4(digits+8);
178 int d = int4(digits+12);
179
180 if(b-a == c-d) return -1; /* something fishy going on... */
181 return 0;
182 }
183
184 /**
185 * return 0 if prefix is okay, -1 if it is not.
186 *
187 * revised prefix test based on Wikipedia bank card number table
188 * http://en.wikipedia.org/wiki/Bank_card_number
189 */
190
prefix_test(const char * digits)191 static int prefix_test(const char *digits)
192 {
193 int len = strlen(digits);
194 int a = int4(digits);
195 int b = int6(digits);
196
197 switch(len){
198 case 13:
199 if(digits[0]=='4') return 0; // Legacy as all 13-digits are deprecated
200 return -1;
201 case 14:
202 if(a>=3000 && a<=3050) return 0; // Diners Club Carte Blanche (DC-CB)
203 if(a>=3600 && a<=3999) return 0; // Diners Club International (DC-Int)
204 return -1;
205 case 15:
206 if(a==2014) return 0; // Diners Club enRoute (DC-eR)
207 if(a==2149) return 0; // Diners Club enRoute (DC-eR)
208 if(a>=3400 && a<=3499) return 0; // American Express (AmEx)
209 if(a>=3700 && a<=3799) return 0; // American Express (AmEx)
210 return -1;
211 case 16:
212 if(a>=3528 && a<=3589) return 0; // JCB (JCB)
213 if(a>=4000 && a<=4999) return 0; // Visa (Visa)
214 if(b==417500) return 0; // Visa (Visa)
215 if(a>=5100 && a<=5999) return 0; // MasterCard (MC)
216 if(b>=560221 && b<=560225) return 0; // BankCard (BC)
217 if(a==5610) return 0; // BankCard (BC)
218 if(a==6011) return 0; // Discovery (Disc)
219 if(b>=622126 && b<=622925) return 0; // China UnionPay (CUP)
220 if(b>=624000 && b<=626999) return 0; // China UnionPay (CUP)
221 if(b>=628200 && b<=628899) return 0; // China UnionPay (CUP)
222 if(a==6304) return 0; // Laser (Lasr)
223 if(a==6334) return 0; // Solo (Solo)
224 if(a==6706) return 0; // Laser (Lasr)
225 if(a==6709) return 0; // Laser (Lasr)
226 if(a==6767) return 0; // Solo (Solo)
227 if(a==6771) return 0; // Laser (Lasr)
228 if(a>=6440 && a<=6499) return 0; // Discovery (Disc)
229 if(a>=6500 && a<=6599) return 0; // Discovery (Disc)
230 return -1;
231 case 17:
232 if(b>=622126 && b<=622925) return 0; // China UnionPay (CUP)
233 if(b>=624000 && b<=626999) return 0; // China UnionPay (CUP)
234 if(b>=628200 && b<=628899) return 0; // China UnionPay (CUP)
235 if(a==6304) return 0; // Laser (Lasr)
236 if(a==6706) return 0; // Laser (Lasr)
237 if(a==6709) return 0; // Laser (Lasr)
238 if(a==6771) return 0; // Laser (Lasr)
239 return -1;
240 case 18:
241 if(b>=622126 && b<=622925) return 0; // China UnionPay (CUP)
242 if(b>=624000 && b<=626999) return 0; // China UnionPay (CUP)
243 if(b>=628200 && b<=628899) return 0; // China UnionPay (CUP)
244 if(a==6304) return 0; // Laser (Lasr)
245 if(a==6334) return 0; // Solo (Solo)
246 if(a==6706) return 0; // Laser (Lasr)
247 if(a==6709) return 0; // Laser (Lasr)
248 if(a==6767) return 0; // Solo (Solo)
249 if(a==6771) return 0; // Laser (Lasr)
250 return -1;
251 case 19:
252 if(b>=622126 && b<=622925) return 0; // China UnionPay (CUP)
253 if(b>=624000 && b<=626999) return 0; // China UnionPay (CUP)
254 if(b>=628200 && b<=628899) return 0; // China UnionPay (CUP)
255 if(a==6304) return 0; // Laser (Lasr)
256 if(a==6334) return 0; // Solo (Solo)
257 if(a==6706) return 0; // Laser (Lasr)
258 if(a==6709) return 0; // Laser (Lasr)
259 if(a==6767) return 0; // Solo (Solo)
260 if(a==6771) return 0; // Laser (Lasr)
261 return -1;
262 }
263 return -1;
264 }
265
266 #define RETURN(code,reason) {if(scan_ccns2_debug){std::cerr << reason << "\n";} return code;}
267 /**
268 * Determine if this is or is not a credit card number.
269 * Return 1 if it is, 0 if it is not.
270 * buf[-WINDOW_MARGIN] must be accessible.
271 * buf[len+WINDOW_MARGIN] must be accessible
272 */
valid_ccn(const char * buf,int buflen)273 bool valid_ccn(const char *buf,int buflen)
274 {
275 /* Make the digits array */
276 if(buflen>19) RETURN(0,"Too long");
277
278 char digits[20]; // just the digits
279
280 memset(digits,0,sizeof(digits));
281 if(extract_digits_and_test(buf,buflen,digits)) RETURN(0,"failed nondigit count");
282 if(prefix_test(digits)) RETURN(0,"failed prefix test");
283 if(ccv1_test(digits)) RETURN(0,"failed ccv1 test");
284 if(pattern_test(digits)) RETURN(0,"failed pattern test");
285 if(histogram_test(digits)) RETURN(0,"failed histogram test");
286
287 int before_window = 4; // what we care about before
288 int after_window = 4; // what we care about before
289
290 /* If the 4 characters before or after are hex digits but not decimal digits,
291 * then this is probably not a credit card number.
292 * We're probably instead in a sea of hex. So abort.
293 */
294 if(only_hex_digits(buf-before_window,before_window) && !only_dec_digits(buf-before_window,before_window)){
295 RETURN(0,"failed before hex test");
296 }
297 if(only_hex_digits(buf+buflen,after_window) && !only_dec_digits(buf+buflen,after_window)){
298 RETURN(0,"failed after hex test");
299 }
300
301 return 1;
302 }
303
304
305 /**
306 * Throw out phone numbers that are preceeded or followed with only
307 * numbers and spaces or brackets. These are commonly seen in PDF files
308 * when they are decompressed.
309 */
valid_char(char ch)310 inline bool valid_char(char ch)
311 {
312 return isdigit(ch) || isspace(ch) || ch=='[' || ch==']' || ch=='<' || ch=='Z' || ch=='.' || ch=='l' || ch=='j';
313 }
314
valid_phone(const sbuf_t & sbuf,size_t pos,size_t len)315 bool valid_phone(const sbuf_t &sbuf,size_t pos,size_t len)
316 {
317 /* We want invalid characters before and after (assuming there is a before and after */
318 int invalid_before = 0;
319 int invalid_after = 0;
320 if(pos>8){
321 for(size_t i=pos-8;i<pos;i++){
322 if(!valid_char(sbuf[i])) invalid_before = 1;
323 }
324 } else {
325 invalid_before = 1;
326 }
327
328 if(sbuf.bufsize < pos+len+8){
329 for(size_t i=pos+len;i<pos+len+8;i++){
330 if(!valid_char(sbuf[i])) invalid_after = 1;
331 }
332 } else {
333 invalid_after = 1;
334 }
335
336 /*
337 * 2013-05-28: if followed by ' #{1,5} ' then it's not a phone either!
338 */
339 if(pos+len+5 < sbuf.bufsize){
340 if(sbuf[pos+len]==' ' && isdigit(sbuf[pos+len+1])){
341 for(size_t i = pos+len+1 ; (i+1<sbuf.bufsize) && (i<pos+len+8);i++){
342 if(isdigit(sbuf[i]) && sbuf[i+1]==' ') return false; // not valid
343 }
344 }
345 }
346
347 /* If it is followed by a dash and a number, it's not a phone number */
348 if(pos+len+2 < sbuf.bufsize){
349 if(sbuf[pos+len]=='-' && isdigit(sbuf[pos+len+1])) return false;
350 }
351
352 return invalid_before!=0 && invalid_after!=0;
353 }
354
355 // http://rosettacode.org/wiki/Bitcoin/address_validation#C
356 static const char *base58_chars =
357 "123456789"
358 "ABCDEFGHJKLMNPQRSTUVWXYZ"
359 "abcdefghijkmnopqrstuvwxyz";
360 static int base58_vals[256];
361 static bool unbase58_built = false;
build_unbase58()362 void build_unbase58()
363 {
364 memset(base58_vals,-1,sizeof(base58_vals));
365 for(size_t i=0;base58_chars[i];i++){
366 base58_vals[(u_char)(base58_chars[i])] = i;
367 }
368 unbase58_built = true;
369 }
370
unbase58(const char * s,uint8_t * out,size_t len)371 bool unbase58(const char *s,uint8_t *out,size_t len)
372 {
373 assert(unbase58_built==true);
374 memset(out,0,25);
375 for(size_t i=0;s[i] && i<len;i++){
376 int c = base58_vals[(u_char)(s[i])];
377 if (c==-1) return false; // invalid character
378 for (int j = 25; j--; ) {
379 c += 58 * out[j];
380 out[j] = c % 256;
381 c /= 256;
382 }
383 if (c!=0) return false; // address too long
384 }
385 return true;
386 }
387
388 // A bitcoin address uses a base58 encoding, which uses an alphabet of the characters 0 .. 9, A ..Z, a .. z,
389 // but without the four characters 0, O, I and l.
valid_bitcoin_address(const char * s,size_t len)390 bool valid_bitcoin_address(const char *s,size_t len){
391 uint8_t dec[32];
392 if (unbase58(s,dec,len)==false) return false;
393 dfxml::sha256_t d1 = dfxml::sha256_generator::hash_buf(dec,21);
394 dfxml::sha256_t d2 = dfxml::sha256_generator::hash_buf(d1.digest,d1.size());
395 if (memcmp(dec+21, d2.digest, 4)!=0){
396 return false;
397 }
398 return true; /* validates */
399 };
400
401
402
403
404
405
406 #ifdef DEBUG
validate_ccn_debug(const char * buf,int buflen)407 static int validate_ccn_debug(const char *buf,int buflen)
408 {
409 char digits[64];
410
411 printf("running tests. 0 means passed, -1 means failed.\n\n");
412 printf("nondigit_test(%s) = %d\n",buf,extract_digits_and_test(buf,buflen,digits));
413 printf("prefix_test(%s) = %d \n",digits,prefix_test(digits));
414 printf("ccv1_test(%s) = %d \n",digits,ccv1_test(digits));
415 printf("histogram_test(%s) = %d \n",digits,histogram_test(digits));
416 printf("pattern_test(%s) = %d \n",digits,pattern_test(digits));
417 printf("only_hex_digits(%s) = %d\n",buf,only_hex_digits(buf,strlen(buf)));
418 printf("only_dec_digits(%s) = %d\n",buf,only_dec_digits(buf,strlen(buf)));
419 return validate_ccn(buf,buflen);
420 }
421 #endif
422