1 #include "config.h"
2
3 // if liblightgrep isn't present, compiles to nothing
4 #ifdef HAVE_LIBLIGHTGREP
5
6 #include <algorithm>
7 #include <string>
8
9 #include "be13_api/scanner_params.h"
10 #include "histogram.h"
11 #include "scan_ccns2.h"
12 #include "pattern_scanner.h"
13 #include "pattern_scanner_utils.h"
14
15 namespace accts {
16 const char* const DefaultEncodingsCStrings[] = {"UTF-8", "UTF-16LE"};
17
18 const vector<string> DefaultEncodings(
19 DefaultEncodingsCStrings,
20 DefaultEncodingsCStrings +
21 sizeof(DefaultEncodingsCStrings)/sizeof(DefaultEncodingsCStrings[0])
22 );
23
24 const vector<string> OnlyUTF8Encoding(1, "UTF-8");
25
26 const vector<string> OnlyUTF16LEEncoding(1, "UTF-16LE");
27
28 const LG_KeyOptions DefaultOptions = { 0, 1 }; // patterns, case-insensitive
29
30 //
31 // helper functions
32 //
33
is_pdf_box(const sbuf_t & sbuf,size_t pos)34 bool is_pdf_box(const sbuf_t& sbuf, size_t pos) {
35 const char box[] = "Box";
36 const size_t c0 = pos >= 10 ? pos - 10 : 10 - pos - 1;
37 const uint8_t* i = search(sbuf.buf + c0, sbuf.buf + pos, box, box + strlen(box));
38 return i != sbuf.buf + pos;
39 /*
40 return i != sbuf.buf + pos && (
41 (i + 2 < sbuf.buf + pos && *(i+1) == ' ' && *(i+2) == '[')
42 || *(i+1) == '['
43 );
44 */
45 }
46
valid_char(char ch)47 inline bool valid_char(char ch) {
48 return isdigit(ch) || isspace(ch) || ch=='[' || ch==']' ||
49 ch=='<' || ch=='Z' || ch=='.' || ch=='l' || ch=='j';
50 }
51
valid_phone_utf16le(const sbuf_t & sbuf,size_t pos,size_t len)52 bool valid_phone_utf16le(const sbuf_t& sbuf, size_t pos, size_t len) {
53 // We want invalid characters before and after (assuming there is a
54 // before and after)
55 bool invalid_before = false;
56 bool invalid_after = false;
57
58 if (pos > 16) {
59 for (size_t i = pos-16; i < pos; ++i) {
60 if (sbuf[i] != '\0' && !valid_char(sbuf[i])) {
61 invalid_before = true;
62 break;
63 }
64 }
65 }
66 else {
67 invalid_before = true;
68 }
69
70 if (sbuf.bufsize < pos+len+16) {
71 for (size_t i = pos+len; i < pos+len+16; ++i) {
72 if (sbuf[i] != '\0' && !valid_char(sbuf[i])) {
73 invalid_after = true;
74 break;
75 }
76 }
77 }
78 else {
79 invalid_after = true;
80 }
81
82 /*
83 * 2013-05-28: if followed by ' #{1,5} ' then it's not a phone either!
84 */
85 if (pos+len+10 < sbuf.bufsize) {
86 if (sbuf[pos+len] == ' ' && sbuf[pos+len+1] == '\0' &&
87 isdigit(sbuf[pos+len+2]) && sbuf[pos+len+3] == '\0') {
88 for (size_t i = pos+len+2; i+3 < sbuf.bufsize && i < pos+len+16; i += 2) {
89 if (isdigit(sbuf[i]) && sbuf[i+1] == '\0' &&
90 sbuf[i+2] == ' ' && sbuf[i+3] == '\0') {
91 return false; // not valid
92 }
93 }
94 }
95 }
96
97 /* If it is followed by a dash and a number, it's not a phone number */
98 if (pos+len+4 < sbuf.bufsize) {
99 if (sbuf[pos+len] == '-' && sbuf[pos+len+1] == '\0' &&
100 isdigit(sbuf[pos+len+2] && sbuf[pos+len+3] == '\0')) {
101 return false;
102 }
103 }
104
105 return invalid_before && invalid_after;
106 }
107
108 //
109 // subpatterns
110 //
111
112 // const string END("([^0-9e.]|(\\.[^0-9]))");
113 const string END("([^\\z2E\\z30-\\z39\\z45\\z65]|(\\.[^\\z30-\\z39]))");
114 const string BLOCK("[0-9]{4}");
115 const string DELIM("[- ]");
116 const string DB("(" + BLOCK + DELIM + ")");
117 const string SDB("([45][0-9]{3}" + DELIM + ")");
118 const string TDEL("[ /.-]");
119
120 const string PHONETEXT_UTF8_CTX("[^\\z41-\\z5A\\z61-\\z7A]");
121 const string PHONETEXT_UTF16LE_CTX("([^\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])");
122 const string PHONETEXT_COMMON("(tel[.ephon]*|fax|facsimile|DSN|telex|TTD|mobile|cell):?");
123 const string PHONETEXT_UTF8("(" + PHONETEXT_UTF8_CTX + PHONETEXT_COMMON + ")");
124 const string PHONETEXT_UTF16LE("(" + PHONETEXT_UTF16LE_CTX + PHONETEXT_COMMON + ")");
125
126 const string YEAR("(19[0-9][0-9]|20[01][0-9])");
127 const string MONTH("(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?|0?[1-9]|1[0-2])");
128 const string DAY("([0-2]?[0-9]|3[01])");
129
130 const string SYEAR("([0-9][0-9])");
131 const string SMONTH("([01][0-2])");
132
133 const string DATEA("(" + YEAR + "-" + MONTH + "-" + DAY + ")");
134 const string DATEB("(" + YEAR + "/" + MONTH + "/" + DAY + ")");
135 const string DATEC("(" + DAY + " " + MONTH + " " + YEAR + ")");
136 const string DATED("(" + MONTH + " " + DAY + "[, ]+" + YEAR + ")");
137
138 const string DATEFORMAT("(" + DATEA + "|" + DATEB + "|" + DATEC + "|" + DATED + ")");
139
140 //
141 // the scaner
142 //
143
144 class Scanner: public PatternScanner {
145 public:
Scanner()146 Scanner(): PatternScanner("accts_lg"), CCN_Recorder(0), CCN_Track2_Recorder(0), Telephone_Recorder(0), Alert_Recorder(0), PII_Recorder(0), SIN_Recorder(0) {}
~Scanner()147 virtual ~Scanner() {}
148
clone() const149 virtual Scanner* clone() const { return new Scanner(*this); }
150
151 virtual void startup(const scanner_params& sp);
152 virtual void init(const scanner_params& sp);
153 virtual void initScan(const scanner_params&);
154
155 feature_recorder* CCN_Recorder;
156 feature_recorder* CCN_Track2_Recorder;
157 feature_recorder* Telephone_Recorder;
158 feature_recorder* Alert_Recorder;
159 feature_recorder* PII_Recorder;
160 feature_recorder* SIN_Recorder;
161
162 void ccnHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
163
164 void ccnUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
165
166 void ccnTrack2HitHandler(const LG_SearchHit& hit, const scanner_params& sp);
167
168 void ccnTrack2UTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
169
170 void telephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
171
172 void telephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
173
174 void telephoneTrailingCtxHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
175
176 void telephoneTrailingCtxUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
177
178 void validatedTelephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
179
180 void validatedTelephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
181
182 void bitlockerHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
183
184 void bitlockerUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
185
186 void piiHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
187
188 void piiUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
189
190 void sinHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
191
192 void sinUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
193
194 void sinHitHandler2(const LG_SearchHit& hit, const scanner_params& sp);
195
196 void sinUTF16LEHitHandler2(const LG_SearchHit& hit, const scanner_params& sp);
197
198 void dateHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
199
200 private:
Scanner(const Scanner & s)201 Scanner(const Scanner& s):
202 PatternScanner(s),
203 CCN_Recorder(s.CCN_Recorder),
204 CCN_Track2_Recorder(s.CCN_Track2_Recorder),
205 Telephone_Recorder(s.Telephone_Recorder),
206 Alert_Recorder(s.Alert_Recorder),
207 PII_Recorder(s.PII_Recorder),
208 SIN_Recorder(s.SIN_Recorder)
209 {}
210
211 Scanner& operator=(const Scanner&);
212 };
213
startup(const scanner_params & sp)214 void Scanner::startup(const scanner_params& sp) {
215 sp.check_version();
216
217 sp.info->name = "accts_lg";
218 sp.info->author = "Simson L. Garfinkel, modified by Tim Walsh";
219 sp.info->description = "scans for CCNs, track 2, PII (including SSN and Canadian SIN), and phone #s";
220 sp.info->scanner_version = "1.0";
221
222 // define the feature files this scanner creates
223 sp.info->feature_names.insert("ccn");
224 sp.info->feature_names.insert("pii"); // personally identifiable information
225 sp.info->feature_names.insert("sin"); // canadian social insurance number
226 sp.info->feature_names.insert("ccn_track2");
227 sp.info->feature_names.insert("telephone");
228 sp.info->histogram_defs.insert(histogram_def("ccn", "", "histogram"));
229 sp.info->histogram_defs.insert(histogram_def("ccn_track2", "", "histogram"));
230
231 // define the histograms to make
232 sp.info->histogram_defs.insert(
233 histogram_def("telephone", "", "histogram", HistogramMaker::FLAG_NUMERIC)
234 );
235
236 scan_ccns2_debug = sp.info->config->debug; // get debug value
237 }
238
init(const scanner_params & sp)239 void Scanner::init(const scanner_params& sp) {
240 //
241 // patterns
242 //
243
244 // FIXME: leading context
245 // FIXME: trailing context
246 /* #### #### #### #### --- most credit card numbers*/
247 const string REGEX2("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]" + SDB + DB + DB + BLOCK + END);
248
249 new Handler(
250 *this,
251 REGEX2,
252 OnlyUTF8Encoding,
253 DefaultOptions,
254 &Scanner::ccnHitHandler
255 );
256
257 const string REGEX2_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])" + SDB + DB + DB + BLOCK + END);
258
259 new Handler(
260 *this,
261 REGEX2_UTF16LE,
262 OnlyUTF16LEEncoding,
263 DefaultOptions,
264 &Scanner::ccnUTF16LEHitHandler
265 );
266
267 // FIXME: leading context
268 // FIXME: trailing context
269 /* 3### ###### ######### --- 15 digits beginning with 3 and funny space. */
270 /* Must be american express... */
271 const string REGEX3("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]3[0-9]{3}" + DELIM + "[0-9]{6}" + DELIM + "[0-9]{5}" + END);
272
273 new Handler(
274 *this,
275 REGEX3,
276 OnlyUTF8Encoding,
277 DefaultOptions,
278 &Scanner::ccnHitHandler
279 );
280
281 const string REGEX3_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]\\z00|[^\\z00])3[0-9]{3}" + DELIM + "[0-9]{6}" + DELIM + "[0-9]{5}" + END);
282
283 new Handler(
284 *this,
285 REGEX3_UTF16LE,
286 OnlyUTF16LEEncoding,
287 DefaultOptions,
288 &Scanner::ccnUTF16LEHitHandler
289 );
290
291 // FIXME: leading context
292 // FIXME: trailing context
293 /* 3### ###### ######### --- 15 digits beginning with 3 and funny space. */
294 /* Must be american express... */
295 const string REGEX4("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]3[0-9]{14}" + END);
296
297 new Handler(
298 *this,
299 REGEX4,
300 OnlyUTF8Encoding,
301 DefaultOptions,
302 &Scanner::ccnHitHandler
303 );
304
305 const string REGEX4_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]\\z00|[^\\z00])3[0-9]{14}" + END);
306
307 new Handler(
308 *this,
309 REGEX4_UTF16LE,
310 OnlyUTF16LEEncoding,
311 DefaultOptions,
312 &Scanner::ccnUTF16LEHitHandler
313 );
314
315 // FIXME: leading context
316 // FIXME: trailing context
317 /* ############### 13-19 numbers as a block beginning with a 4 or 5
318 * followed by something that is not a digit.
319 * Yes, CCNs can now be up to 19 digits long.
320 * http://www.creditcards.com/credit-card-news/credit-card-appearance-1268.php
321 */
322 const string REGEX5("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E][4-6][0-9]{15,18}" + END);
323
324 new Handler(
325 *this,
326 REGEX5,
327 OnlyUTF8Encoding,
328 DefaultOptions,
329 &Scanner::ccnHitHandler
330 );
331
332 const string REGEX5_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]\\z00|[^\\z00])[4-6][0-9]{15,18}" + END);
333
334 new Handler(
335 *this,
336 REGEX5_UTF16LE,
337 OnlyUTF16LEEncoding,
338 DefaultOptions,
339 &Scanner::ccnUTF16LEHitHandler
340 );
341
342 // FIXME: leading context
343 /* ;###############=YYMM101#+? --- track2 credit card data */
344 /* {SYEAR}{SMONTH} */
345 /* ;CCN=05061010000000000738? */
346 const string REGEX6("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A][4-6][0-9]{15,18}=" + SYEAR + SMONTH + "101[0-9]{13}");
347
348 new Handler(
349 *this,
350 REGEX6,
351 OnlyUTF8Encoding,
352 DefaultOptions,
353 &Scanner::ccnTrack2HitHandler
354 );
355
356 const string REGEX6_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])[4-6][0-9]{15,18}=" + SYEAR + SMONTH + "101[0-9]{13}");
357
358 new Handler(
359 *this,
360 REGEX6_UTF16LE,
361 OnlyUTF16LEEncoding,
362 DefaultOptions,
363 &Scanner::ccnTrack2UTF16LEHitHandler
364 );
365
366 // FIXME: trailing context
367 // FIXME: leading context
368 /* US phone numbers without area code in parens */
369 /* New addition: If proceeded by " ####? ####? "
370 * then do not consider this a phone number. We see a lot of that stuff in
371 * PDF files.
372 */
373 const string REGEX7("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]([0-9]{3}" + TDEL + "){2}[0-9]{4}" + END);
374
375 new Handler(
376 *this,
377 REGEX7,
378 OnlyUTF8Encoding,
379 DefaultOptions,
380 &Scanner::validatedTelephoneHitHandler
381 );
382
383 const string REGEX7_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])([0-9]{3}" + TDEL + "){2}[0-9]{4}" + END);
384
385 new Handler(
386 *this,
387 REGEX7,
388 OnlyUTF16LEEncoding,
389 DefaultOptions,
390 &Scanner::validatedTelephoneUTF16LEHitHandler
391 );
392
393 // FIXME: trailing context
394 // FIXME: leading context
395 /* US phone number with parens, like (215) 555-1212 */
396 const string REGEX8("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\([0-9]{3}\\)" + TDEL + "?[0-9]{3}" + TDEL + "[0-9]{4}" + END);
397
398 new Handler(
399 *this,
400 REGEX8,
401 OnlyUTF8Encoding,
402 DefaultOptions,
403 &Scanner::telephoneTrailingCtxHitHandler
404 );
405
406 const string REGEX8_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])\\([0-9]{3}\\)" + TDEL + "?[0-9]{3}" + TDEL + "[0-9]{4}" + END);
407
408 new Handler(
409 *this,
410 REGEX8_UTF16LE,
411 OnlyUTF16LEEncoding,
412 DefaultOptions,
413 &Scanner::telephoneTrailingCtxUTF16LEHitHandler
414 );
415
416 // FIXME: trailing context
417 // FIXME: leading context
418 /* Generalized international phone numbers */
419 const string REGEX9("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\+[0-9]{1,3}(" + TDEL + "[0-9]{2,3}){2,6}[0-9]{2,4}" + END);
420
421 new Handler(
422 *this,
423 REGEX9,
424 OnlyUTF8Encoding,
425 DefaultOptions,
426 &Scanner::validatedTelephoneHitHandler
427 );
428
429 const string REGEX9_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])\\+[0-9]{1,3}(" + TDEL + "[0-9]{2,3}){2,6}[0-9]{2,4}" + END);
430
431 new Handler(
432 *this,
433 REGEX9,
434 OnlyUTF16LEEncoding,
435 DefaultOptions,
436 &Scanner::validatedTelephoneHitHandler
437 );
438
439 // FIXME: leading context
440 /* Generalized number with prefix */
441 const string REGEX10(PHONETEXT_UTF8 + "[0-9/ .+]{7,18}");
442
443 new Handler(
444 *this,
445 REGEX10,
446 OnlyUTF8Encoding,
447 DefaultOptions,
448 &Scanner::telephoneHitHandler
449 );
450
451 const string REGEX10_UTF16LE(PHONETEXT_UTF16LE + "[0-9/ .+]{7,18}");
452
453 new Handler(
454 *this,
455 REGEX10_UTF16LE,
456 OnlyUTF16LEEncoding,
457 DefaultOptions,
458 &Scanner::telephoneUTF16LEHitHandler
459 );
460
461 // FIXME: leading context
462 /* Generalized number with city code and prefix */
463 const string REGEX11(PHONETEXT_UTF8 + "[0-9 +]+ ?\\([0-9]{2,4}\\) ?[\\-0-9]{4,8}");
464
465 new Handler(
466 *this,
467 REGEX11,
468 OnlyUTF8Encoding,
469 DefaultOptions,
470 &Scanner::telephoneHitHandler
471 );
472
473 const string REGEX11_UTF16LE(PHONETEXT_UTF16LE + "[0-9 +]+ ?\\([0-9]{2,4}\\) ?[\\-0-9]{4,8}");
474
475 new Handler(
476 *this,
477 REGEX11_UTF16LE,
478 OnlyUTF16LEEncoding,
479 DefaultOptions,
480 &Scanner::telephoneUTF16LEHitHandler
481 );
482
483 // FIXME: trailing context
484 /* Generalized international phone numbers */
485 const string REGEX12("fedex[^a-z]+([0-9]{4}[- ]?){2}[0-9]" + END);
486
487 new Handler(
488 *this,
489 REGEX12,
490 OnlyUTF8Encoding,
491 DefaultOptions,
492 &Scanner::piiHitHandler
493 );
494
495 new Handler(
496 *this,
497 REGEX12,
498 OnlyUTF16LEEncoding,
499 DefaultOptions,
500 &Scanner::piiUTF16LEHitHandler
501 );
502
503 // FIXME: trailing context
504 const string REGEX13("ssn:?[ \\t]+[0-9]{3}-?[0-9]{2}-?[0-9]{4}" + END);
505
506 new Handler(
507 *this,
508 REGEX13,
509 OnlyUTF8Encoding,
510 DefaultOptions,
511 &Scanner::piiHitHandler
512 );
513
514 new Handler(
515 *this,
516 REGEX13,
517 OnlyUTF16LEEncoding,
518 DefaultOptions,
519 &Scanner::piiUTF16LEHitHandler
520 );
521
522 const string REGEX14("dob:?[ \\t]+" + DATEFORMAT);
523
524 new Handler(
525 *this,
526 REGEX14,
527 DefaultEncodings,
528 DefaultOptions,
529 &Scanner::dateHitHandler
530 );
531
532 // FIXME: trailing context
533 const string REGEX15("sin:?[ \\t]+[0-9]{3}[ -]?[0-9]{3}[ -]?[0-9]{3}" + END);
534
535 new Handler(
536 *this,
537 REGEX15,
538 OnlyUTF8Encoding,
539 DefaultOptions,
540 &Scanner::sinHitHandler
541 );
542
543 new Handler(
544 *this,
545 REGEX15,
546 OnlyUTF16LEEncoding,
547 DefaultOptions,
548 &Scanner::sinUTF16LEHitHandler
549 );
550
551 const string REGEX16("[^0-9][0-9]{3}-[0-9]{3}-[0-9]{3}" + END);
552
553 new Handler(
554 *this,
555 REGEX16,
556 OnlyUTF8Encoding,
557 DefaultOptions,
558 &Scanner::sinHitHandler2
559 );
560
561 new Handler(
562 *this,
563 REGEX16,
564 OnlyUTF16LEEncoding,
565 DefaultOptions,
566 &Scanner::sinUTF16LEHitHandler2
567 );
568
569 // FIXME: leading context
570 // FIXME: trailing context
571 /* Possible BitLocker Recovery Key. */
572 const string BITLOCKER("[^\\z30-\\z39]([0-9]{6}-){7}[0-9]{6}[^\\z30-\\z39]");
573
574 new Handler(
575 *this,
576 BITLOCKER,
577 OnlyUTF8Encoding,
578 DefaultOptions,
579 &Scanner::bitlockerHitHandler
580 );
581
582 const string BITLOCKER_UTF16LE("([^\\z30-\\z39]\\z00|[^\\z00])([0-9]{6}-){7}[0-9]{6}[^\\z30-\\z39]");
583
584 new Handler(
585 *this,
586 BITLOCKER,
587 OnlyUTF16LEEncoding,
588 DefaultOptions,
589 &Scanner::bitlockerUTF16LEHitHandler
590 );
591 }
592
initScan(const scanner_params & sp)593 void Scanner::initScan(const scanner_params& sp) {
594 CCN_Recorder = sp.fs.named_feature_recorder("ccn");
595 CCN_Track2_Recorder = sp.fs.named_feature_recorder("ccn_track2");
596 Telephone_Recorder = sp.fs.named_feature_recorder("telephone");
597 Alert_Recorder = sp.fs.get_alert_recorder();
598 PII_Recorder = sp.fs.named_feature_recorder("pii");
599 SIN_Recorder = sp.fs.named_feature_recorder("sin");
600 }
601
ccnHitHandler(const LG_SearchHit & hit,const scanner_params & sp)602 void Scanner::ccnHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
603 const size_t pos = hit.Start + 1;
604 const size_t len = hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - pos;
605
606 if (valid_ccn(reinterpret_cast<const char*>(sp.sbuf.buf)+pos, len)) {
607 CCN_Recorder->write_buf(sp.sbuf, pos, len);
608 }
609 }
610
ccnUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)611 void Scanner::ccnUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
612 const size_t pos = hit.Start + (*(sp.sbuf.buf+hit.Start+1) == '\0' ? 2 : 1); const size_t len = hit.End - pos;
613
614 const string ascii(low_utf16le_to_ascii(sp.sbuf.buf+pos, len));
615 if (valid_ccn(ascii.c_str(), ascii.size())) {
616 CCN_Recorder->write_buf(sp.sbuf, pos, len);
617 }
618 }
619
ccnTrack2HitHandler(const LG_SearchHit & hit,const scanner_params & sp)620 void Scanner::ccnTrack2HitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
621 const size_t pos = hit.Start + 1;
622 const size_t len = hit.End - pos;
623
624 if (valid_ccn(reinterpret_cast<const char*>(sp.sbuf.buf)+pos, len)) {
625 CCN_Recorder->write_buf(sp.sbuf, pos, len);
626 }
627 }
628
ccnTrack2UTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)629 void Scanner::ccnTrack2UTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
630 const size_t pos = hit.Start + (*(sp.sbuf.buf+hit.Start+1) == '\0' ? 2 : 1);
631 const size_t len = hit.End - pos;
632
633 const string ascii(low_utf16le_to_ascii(sp.sbuf.buf+pos, len));
634 if (valid_ccn(ascii.c_str(), ascii.size())) {
635 CCN_Recorder->write_buf(sp.sbuf, pos, len);
636 }
637 }
638
telephoneHitHandler(const LG_SearchHit & hit,const scanner_params & sp)639 void Scanner::telephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
640 Telephone_Recorder->write_buf(sp.sbuf, hit.Start+1, hit.End-hit.Start-1);
641 }
642
telephoneUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)643 void Scanner::telephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
644 const size_t start = hit.Start + (*(sp.sbuf.buf + hit.Start + 1) == '\0' ? 2 : 1);
645 const size_t len = hit.End - start;
646
647 Telephone_Recorder->write_buf(sp.sbuf, start, len);
648 }
649
telephoneTrailingCtxHitHandler(const LG_SearchHit & hit,const scanner_params & sp)650 void Scanner::telephoneTrailingCtxHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
651 Telephone_Recorder->write_buf(
652 sp.sbuf,
653 hit.Start+1,
654 hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - (hit.Start+1)
655 );
656 }
657
telephoneTrailingCtxUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)658 void Scanner::telephoneTrailingCtxUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
659 Telephone_Recorder->write_buf(
660 sp.sbuf,
661 hit.Start+1,
662 hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) -(hit.Start+1)
663 );
664 }
665
validatedTelephoneHitHandler(const LG_SearchHit & hit,const scanner_params & sp)666 void Scanner::validatedTelephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
667 const size_t pos = hit.Start + 1;
668 const size_t len = hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - pos;
669 if (valid_phone(sp.sbuf, pos, len)){
670 if (!is_pdf_box(sp.sbuf, pos)) {
671 Telephone_Recorder->write_buf(sp.sbuf, pos, len);
672 }
673 }
674 }
675
validatedTelephoneUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)676 void Scanner::validatedTelephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
677 const size_t pos = hit.Start + 1;
678 const size_t len = hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - pos;
679 if (valid_phone_utf16le(sp.sbuf, pos, len)){
680 Telephone_Recorder->write_buf(sp.sbuf, pos, len);
681 }
682 }
683
bitlockerHitHandler(const LG_SearchHit & hit,const scanner_params & sp)684 void Scanner::bitlockerHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
685 Alert_Recorder->write(sp.sbuf.pos0 + hit.Start + 1, reinterpret_cast<const char*>(sp.sbuf.buf) + 1, "Possible BitLocker Recovery Key (ASCII).");
686 }
687
bitlockerUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)688 void Scanner::bitlockerUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
689 const size_t pos = hit.Start + (*(sp.sbuf.buf + hit.Start + 1) == '\0' ? 2 : 1);
690 const size_t len = (hit.End - 1) - pos;
691
692 Alert_Recorder->write(sp.sbuf.pos0 + pos, low_utf16le_to_ascii(sp.sbuf.buf + pos, len), "Possible BitLocker Recovery Key (UTF-16).");
693 }
694
piiHitHandler(const LG_SearchHit & hit,const scanner_params & sp)695 void Scanner::piiHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
696 PII_Recorder->write_buf(
697 sp.sbuf, hit.Start,
698 hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - hit.Start
699 );
700 }
701
piiUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)702 void Scanner::piiUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
703 PII_Recorder->write_buf(
704 sp.sbuf, hit.Start,
705 hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) - hit.Start
706 );
707 }
708
sinHitHandler(const LG_SearchHit & hit,const scanner_params & sp)709 void Scanner::sinHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
710 SIN_Recorder->write_buf(
711 sp.sbuf, hit.Start,
712 hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - hit.Start
713 );
714 }
715
sinUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)716 void Scanner::sinUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
717 SIN_Recorder->write_buf(
718 sp.sbuf, hit.Start,
719 hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) - hit.Start
720 );
721 }
722
sinHitHandler2(const LG_SearchHit & hit,const scanner_params & sp)723 void Scanner::sinHitHandler2(const LG_SearchHit& hit, const scanner_params& sp) {
724 SIN_Recorder->write_buf(
725 sp.sbuf, hit.Start+1,
726 hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - hit.Start
727 );
728 }
729
sinUTF16LEHitHandler2(const LG_SearchHit & hit,const scanner_params & sp)730 void Scanner::sinUTF16LEHitHandler2(const LG_SearchHit& hit, const scanner_params& sp) {
731 SIN_Recorder->write_buf(
732 sp.sbuf, hit.Start+1,
733 hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) - hit.Start
734 );
735 }
736
dateHitHandler(const LG_SearchHit & hit,const scanner_params & sp)737 void Scanner::dateHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
738 PII_Recorder->write_buf(sp.sbuf, hit.Start, hit.End - hit.Start);
739 }
740
741 Scanner TheScanner;
742 }
743
744 extern "C"
scan_accts_lg(struct scanner_params & sp)745 void scan_accts_lg(struct scanner_params &sp) {
746 scan_lg(accts::TheScanner, sp, rcb);
747 }
748
749 #endif // HAVE_LIBLIGHTGREP
750