1 #include "config.h"
2 
3 // if liblightgrep isn't present, compiles to nothing
4 #ifdef HAVE_LIBLIGHTGREP
5 
6 #include <algorithm>
7 #include <string>
8 
9 #include "be13_api/scanner_params.h"
10 #include "histogram.h"
11 #include "scan_ccns2.h"
12 #include "pattern_scanner.h"
13 #include "pattern_scanner_utils.h"
14 
15 namespace accts {
16   const char* const DefaultEncodingsCStrings[] = {"UTF-8", "UTF-16LE"};
17 
18   const vector<string> DefaultEncodings(
19     DefaultEncodingsCStrings,
20     DefaultEncodingsCStrings +
21       sizeof(DefaultEncodingsCStrings)/sizeof(DefaultEncodingsCStrings[0])
22   );
23 
24   const vector<string> OnlyUTF8Encoding(1, "UTF-8");
25 
26   const vector<string> OnlyUTF16LEEncoding(1, "UTF-16LE");
27 
28   const LG_KeyOptions DefaultOptions = { 0, 1 }; // patterns, case-insensitive
29 
30   //
31   // helper functions
32   //
33 
is_pdf_box(const sbuf_t & sbuf,size_t pos)34   bool is_pdf_box(const sbuf_t& sbuf, size_t pos) {
35     const char box[] = "Box";
36     const size_t c0 = pos >= 10 ? pos - 10 : 10 - pos - 1;
37     const uint8_t* i = search(sbuf.buf + c0, sbuf.buf + pos, box, box + strlen(box));
38     return i != sbuf.buf + pos;
39 /*
40     return i != sbuf.buf + pos && (
41       (i + 2 < sbuf.buf + pos && *(i+1) == ' ' && *(i+2) == '[')
42       || *(i+1) == '['
43     );
44 */
45   }
46 
valid_char(char ch)47   inline bool valid_char(char ch) {
48     return isdigit(ch) || isspace(ch) || ch=='[' || ch==']' ||
49            ch=='<' || ch=='Z' || ch=='.' || ch=='l' || ch=='j';
50   }
51 
valid_phone_utf16le(const sbuf_t & sbuf,size_t pos,size_t len)52   bool valid_phone_utf16le(const sbuf_t& sbuf, size_t pos, size_t len) {
53     // We want invalid characters before and after (assuming there is a
54     // before and after)
55     bool invalid_before = false;
56     bool invalid_after = false;
57 
58     if (pos > 16) {
59       for (size_t i = pos-16; i < pos; ++i) {
60         if (sbuf[i] != '\0' && !valid_char(sbuf[i])) {
61           invalid_before = true;
62           break;
63         }
64       }
65     }
66     else {
67       invalid_before = true;
68     }
69 
70     if (sbuf.bufsize < pos+len+16) {
71       for (size_t i = pos+len; i < pos+len+16; ++i) {
72         if (sbuf[i] != '\0' && !valid_char(sbuf[i])) {
73           invalid_after = true;
74           break;
75         }
76       }
77     }
78     else {
79       invalid_after = true;
80     }
81 
82     /*
83      * 2013-05-28: if followed by ' #{1,5} ' then it's not a phone either!
84      */
85     if (pos+len+10 < sbuf.bufsize) {
86       if (sbuf[pos+len] == ' ' && sbuf[pos+len+1] == '\0' &&
87           isdigit(sbuf[pos+len+2]) && sbuf[pos+len+3] == '\0') {
88         for (size_t i = pos+len+2; i+3 < sbuf.bufsize && i < pos+len+16; i += 2) {
89           if (isdigit(sbuf[i]) && sbuf[i+1] == '\0' &&
90               sbuf[i+2] == ' ' && sbuf[i+3] == '\0') {
91             return false; // not valid
92           }
93         }
94       }
95     }
96 
97     /* If it is followed by a dash and a number, it's not a phone number */
98     if (pos+len+4 < sbuf.bufsize) {
99       if (sbuf[pos+len] == '-' && sbuf[pos+len+1] == '\0' &&
100           isdigit(sbuf[pos+len+2] && sbuf[pos+len+3] == '\0')) {
101         return false;
102       }
103     }
104 
105     return invalid_before && invalid_after;
106   }
107 
108   //
109   // subpatterns
110   //
111 
112 //  const string END("([^0-9e.]|(\\.[^0-9]))");
113   const string END("([^\\z2E\\z30-\\z39\\z45\\z65]|(\\.[^\\z30-\\z39]))");
114   const string BLOCK("[0-9]{4}");
115   const string DELIM("[- ]");
116   const string DB("(" + BLOCK + DELIM + ")");
117   const string SDB("([45][0-9]{3}" + DELIM + ")");
118   const string TDEL("[ /.-]");
119 
120   const string PHONETEXT_UTF8_CTX("[^\\z41-\\z5A\\z61-\\z7A]");
121   const string PHONETEXT_UTF16LE_CTX("([^\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])");
122   const string PHONETEXT_COMMON("(tel[.ephon]*|fax|facsimile|DSN|telex|TTD|mobile|cell):?");
123   const string PHONETEXT_UTF8("(" + PHONETEXT_UTF8_CTX + PHONETEXT_COMMON + ")");
124   const string PHONETEXT_UTF16LE("(" + PHONETEXT_UTF16LE_CTX + PHONETEXT_COMMON + ")");
125 
126   const string YEAR("(19[0-9][0-9]|20[01][0-9])");
127   const string MONTH("(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?|0?[1-9]|1[0-2])");
128   const string DAY("([0-2]?[0-9]|3[01])");
129 
130   const string SYEAR("([0-9][0-9])");
131   const string SMONTH("([01][0-2])");
132 
133   const string DATEA("(" + YEAR + "-" + MONTH + "-" + DAY + ")");
134   const string DATEB("(" + YEAR + "/" + MONTH + "/" + DAY + ")");
135   const string DATEC("(" + DAY + " " + MONTH + " " + YEAR + ")");
136   const string DATED("(" + MONTH + " " + DAY + "[, ]+" + YEAR + ")");
137 
138   const string DATEFORMAT("(" + DATEA + "|" + DATEB + "|" + DATEC + "|" + DATED + ")");
139 
140   //
141   // the scaner
142   //
143 
144   class Scanner: public PatternScanner {
145   public:
Scanner()146     Scanner(): PatternScanner("accts_lg"), CCN_Recorder(0), CCN_Track2_Recorder(0), Telephone_Recorder(0), Alert_Recorder(0), PII_Recorder(0), SIN_Recorder(0) {}
~Scanner()147     virtual ~Scanner() {}
148 
clone() const149     virtual Scanner* clone() const { return new Scanner(*this); }
150 
151     virtual void startup(const scanner_params& sp);
152     virtual void init(const scanner_params& sp);
153     virtual void initScan(const scanner_params&);
154 
155     feature_recorder* CCN_Recorder;
156     feature_recorder* CCN_Track2_Recorder;
157     feature_recorder* Telephone_Recorder;
158     feature_recorder* Alert_Recorder;
159     feature_recorder* PII_Recorder;
160     feature_recorder* SIN_Recorder;
161 
162     void ccnHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
163 
164     void ccnUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
165 
166     void ccnTrack2HitHandler(const LG_SearchHit& hit, const scanner_params& sp);
167 
168     void ccnTrack2UTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
169 
170     void telephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
171 
172     void telephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
173 
174     void telephoneTrailingCtxHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
175 
176     void telephoneTrailingCtxUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
177 
178     void validatedTelephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
179 
180     void validatedTelephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
181 
182     void bitlockerHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
183 
184     void bitlockerUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
185 
186     void piiHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
187 
188     void piiUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
189 
190     void sinHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
191 
192     void sinUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
193 
194     void sinHitHandler2(const LG_SearchHit& hit, const scanner_params& sp);
195 
196     void sinUTF16LEHitHandler2(const LG_SearchHit& hit, const scanner_params& sp);
197 
198     void dateHitHandler(const LG_SearchHit& hit, const scanner_params& sp);
199 
200   private:
Scanner(const Scanner & s)201     Scanner(const Scanner& s):
202       PatternScanner(s),
203       CCN_Recorder(s.CCN_Recorder),
204       CCN_Track2_Recorder(s.CCN_Track2_Recorder),
205       Telephone_Recorder(s.Telephone_Recorder),
206       Alert_Recorder(s.Alert_Recorder),
207       PII_Recorder(s.PII_Recorder),
208       SIN_Recorder(s.SIN_Recorder)
209     {}
210 
211     Scanner& operator=(const Scanner&);
212   };
213 
startup(const scanner_params & sp)214   void Scanner::startup(const scanner_params& sp) {
215       sp.check_version();
216 
217     sp.info->name            = "accts_lg";
218     sp.info->author          = "Simson L. Garfinkel, modified by Tim Walsh";
219     sp.info->description     = "scans for CCNs, track 2, PII (including SSN and Canadian SIN), and phone #s";
220     sp.info->scanner_version = "1.0";
221 
222     // define the feature files this scanner creates
223     sp.info->feature_names.insert("ccn");
224     sp.info->feature_names.insert("pii");  // personally identifiable information
225     sp.info->feature_names.insert("sin");  // canadian social insurance number
226     sp.info->feature_names.insert("ccn_track2");
227     sp.info->feature_names.insert("telephone");
228     sp.info->histogram_defs.insert(histogram_def("ccn", "", "histogram"));
229     sp.info->histogram_defs.insert(histogram_def("ccn_track2", "", "histogram"));
230 
231     // define the histograms to make
232     sp.info->histogram_defs.insert(
233       histogram_def("telephone", "", "histogram", HistogramMaker::FLAG_NUMERIC)
234     );
235 
236     scan_ccns2_debug = sp.info->config->debug;           // get debug value
237   }
238 
init(const scanner_params & sp)239   void Scanner::init(const scanner_params& sp) {
240     //
241     // patterns
242     //
243 
244     // FIXME: leading context
245     // FIXME: trailing context
246     /* #### #### #### #### --- most credit card numbers*/
247     const string REGEX2("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]" + SDB + DB + DB + BLOCK + END);
248 
249     new Handler(
250       *this,
251       REGEX2,
252       OnlyUTF8Encoding,
253       DefaultOptions,
254       &Scanner::ccnHitHandler
255     );
256 
257     const string REGEX2_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])" + SDB + DB + DB + BLOCK + END);
258 
259     new Handler(
260       *this,
261       REGEX2_UTF16LE,
262       OnlyUTF16LEEncoding,
263       DefaultOptions,
264       &Scanner::ccnUTF16LEHitHandler
265     );
266 
267     // FIXME: leading context
268     // FIXME: trailing context
269     /* 3### ###### ######### --- 15 digits beginning with 3 and funny space. */
270     /* Must be american express... */
271     const string REGEX3("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]3[0-9]{3}" + DELIM + "[0-9]{6}" + DELIM + "[0-9]{5}" + END);
272 
273     new Handler(
274       *this,
275       REGEX3,
276       OnlyUTF8Encoding,
277       DefaultOptions,
278       &Scanner::ccnHitHandler
279     );
280 
281     const string REGEX3_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]\\z00|[^\\z00])3[0-9]{3}" + DELIM + "[0-9]{6}" + DELIM + "[0-9]{5}" + END);
282 
283     new Handler(
284       *this,
285       REGEX3_UTF16LE,
286       OnlyUTF16LEEncoding,
287       DefaultOptions,
288       &Scanner::ccnUTF16LEHitHandler
289     );
290 
291     // FIXME: leading context
292     // FIXME: trailing context
293     /* 3### ###### ######### --- 15 digits beginning with 3 and funny space. */
294     /* Must be american express... */
295     const string REGEX4("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]3[0-9]{14}" + END);
296 
297     new Handler(
298       *this,
299       REGEX4,
300       OnlyUTF8Encoding,
301       DefaultOptions,
302       &Scanner::ccnHitHandler
303     );
304 
305     const string REGEX4_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]\\z00|[^\\z00])3[0-9]{14}" + END);
306 
307     new Handler(
308       *this,
309       REGEX4_UTF16LE,
310       OnlyUTF16LEEncoding,
311       DefaultOptions,
312       &Scanner::ccnUTF16LEHitHandler
313     );
314 
315     // FIXME: leading context
316     // FIXME: trailing context
317     /* ###############  13-19 numbers as a block beginning with a 4 or 5
318      * followed by something that is not a digit.
319      * Yes, CCNs can now be up to 19 digits long.
320      * http://www.creditcards.com/credit-card-news/credit-card-appearance-1268.php
321      */
322     const string REGEX5("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E][4-6][0-9]{15,18}" + END);
323 
324     new Handler(
325       *this,
326       REGEX5,
327       OnlyUTF8Encoding,
328       DefaultOptions,
329       &Scanner::ccnHitHandler
330     );
331 
332     const string REGEX5_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A\\z2E]\\z00|[^\\z00])[4-6][0-9]{15,18}" + END);
333 
334     new Handler(
335       *this,
336       REGEX5_UTF16LE,
337       OnlyUTF16LEEncoding,
338       DefaultOptions,
339       &Scanner::ccnUTF16LEHitHandler
340     );
341 
342     // FIXME: leading context
343     /* ;###############=YYMM101#+? --- track2 credit card data */
344     /* {SYEAR}{SMONTH} */
345     /* ;CCN=05061010000000000738? */
346     const string REGEX6("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A][4-6][0-9]{15,18}=" + SYEAR + SMONTH + "101[0-9]{13}");
347 
348     new Handler(
349       *this,
350       REGEX6,
351       OnlyUTF8Encoding,
352       DefaultOptions,
353       &Scanner::ccnTrack2HitHandler
354     );
355 
356     const string REGEX6_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])[4-6][0-9]{15,18}=" + SYEAR + SMONTH + "101[0-9]{13}");
357 
358     new Handler(
359       *this,
360       REGEX6_UTF16LE,
361       OnlyUTF16LEEncoding,
362       DefaultOptions,
363       &Scanner::ccnTrack2UTF16LEHitHandler
364     );
365 
366     // FIXME: trailing context
367     // FIXME: leading context
368     /* US phone numbers without area code in parens */
369     /* New addition: If proceeded by " ####? ####? "
370      * then do not consider this a phone number. We see a lot of that stuff in
371      * PDF files.
372      */
373     const string REGEX7("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]([0-9]{3}" + TDEL + "){2}[0-9]{4}" + END);
374 
375     new Handler(
376       *this,
377       REGEX7,
378       OnlyUTF8Encoding,
379       DefaultOptions,
380       &Scanner::validatedTelephoneHitHandler
381     );
382 
383     const string REGEX7_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])([0-9]{3}" + TDEL + "){2}[0-9]{4}" + END);
384 
385     new Handler(
386       *this,
387       REGEX7,
388       OnlyUTF16LEEncoding,
389       DefaultOptions,
390       &Scanner::validatedTelephoneUTF16LEHitHandler
391     );
392 
393     // FIXME: trailing context
394     // FIXME: leading context
395     /* US phone number with parens, like (215) 555-1212 */
396     const string REGEX8("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\([0-9]{3}\\)" + TDEL + "?[0-9]{3}" + TDEL + "[0-9]{4}" + END);
397 
398     new Handler(
399       *this,
400       REGEX8,
401       OnlyUTF8Encoding,
402       DefaultOptions,
403       &Scanner::telephoneTrailingCtxHitHandler
404     );
405 
406     const string REGEX8_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])\\([0-9]{3}\\)" + TDEL + "?[0-9]{3}" + TDEL + "[0-9]{4}" + END);
407 
408     new Handler(
409       *this,
410       REGEX8_UTF16LE,
411       OnlyUTF16LEEncoding,
412       DefaultOptions,
413       &Scanner::telephoneTrailingCtxUTF16LEHitHandler
414     );
415 
416     // FIXME: trailing context
417     // FIXME: leading context
418     /* Generalized international phone numbers */
419     const string REGEX9("[^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\+[0-9]{1,3}(" + TDEL + "[0-9]{2,3}){2,6}[0-9]{2,4}" + END);
420 
421     new Handler(
422       *this,
423       REGEX9,
424       OnlyUTF8Encoding,
425       DefaultOptions,
426       &Scanner::validatedTelephoneHitHandler
427     );
428 
429     const string REGEX9_UTF16LE("([^\\z30-\\z39\\z41-\\z5A\\z61-\\z7A]\\z00|[^\\z00])\\+[0-9]{1,3}(" + TDEL + "[0-9]{2,3}){2,6}[0-9]{2,4}" + END);
430 
431     new Handler(
432       *this,
433       REGEX9,
434       OnlyUTF16LEEncoding,
435       DefaultOptions,
436       &Scanner::validatedTelephoneHitHandler
437     );
438 
439     // FIXME: leading context
440     /* Generalized number with prefix */
441     const string REGEX10(PHONETEXT_UTF8 + "[0-9/ .+]{7,18}");
442 
443     new Handler(
444       *this,
445       REGEX10,
446       OnlyUTF8Encoding,
447       DefaultOptions,
448       &Scanner::telephoneHitHandler
449     );
450 
451     const string REGEX10_UTF16LE(PHONETEXT_UTF16LE + "[0-9/ .+]{7,18}");
452 
453     new Handler(
454       *this,
455       REGEX10_UTF16LE,
456       OnlyUTF16LEEncoding,
457       DefaultOptions,
458       &Scanner::telephoneUTF16LEHitHandler
459     );
460 
461     // FIXME: leading context
462     /* Generalized number with city code and prefix */
463     const string REGEX11(PHONETEXT_UTF8 + "[0-9 +]+ ?\\([0-9]{2,4}\\) ?[\\-0-9]{4,8}");
464 
465     new Handler(
466       *this,
467       REGEX11,
468       OnlyUTF8Encoding,
469       DefaultOptions,
470       &Scanner::telephoneHitHandler
471     );
472 
473    const string REGEX11_UTF16LE(PHONETEXT_UTF16LE + "[0-9 +]+ ?\\([0-9]{2,4}\\) ?[\\-0-9]{4,8}");
474 
475     new Handler(
476       *this,
477       REGEX11_UTF16LE,
478       OnlyUTF16LEEncoding,
479       DefaultOptions,
480       &Scanner::telephoneUTF16LEHitHandler
481     );
482 
483     // FIXME: trailing context
484     /* Generalized international phone numbers */
485     const string REGEX12("fedex[^a-z]+([0-9]{4}[- ]?){2}[0-9]" + END);
486 
487     new Handler(
488       *this,
489       REGEX12,
490       OnlyUTF8Encoding,
491       DefaultOptions,
492       &Scanner::piiHitHandler
493     );
494 
495     new Handler(
496       *this,
497       REGEX12,
498       OnlyUTF16LEEncoding,
499       DefaultOptions,
500       &Scanner::piiUTF16LEHitHandler
501     );
502 
503     // FIXME: trailing context
504     const string REGEX13("ssn:?[ \\t]+[0-9]{3}-?[0-9]{2}-?[0-9]{4}" + END);
505 
506     new Handler(
507       *this,
508       REGEX13,
509       OnlyUTF8Encoding,
510       DefaultOptions,
511       &Scanner::piiHitHandler
512     );
513 
514     new Handler(
515       *this,
516       REGEX13,
517       OnlyUTF16LEEncoding,
518       DefaultOptions,
519       &Scanner::piiUTF16LEHitHandler
520     );
521 
522     const string REGEX14("dob:?[ \\t]+" + DATEFORMAT);
523 
524     new Handler(
525       *this,
526       REGEX14,
527       DefaultEncodings,
528       DefaultOptions,
529       &Scanner::dateHitHandler
530     );
531 
532     // FIXME: trailing context
533     const string REGEX15("sin:?[ \\t]+[0-9]{3}[ -]?[0-9]{3}[ -]?[0-9]{3}" + END);
534 
535     new Handler(
536       *this,
537       REGEX15,
538       OnlyUTF8Encoding,
539       DefaultOptions,
540       &Scanner::sinHitHandler
541     );
542 
543     new Handler(
544       *this,
545       REGEX15,
546       OnlyUTF16LEEncoding,
547       DefaultOptions,
548       &Scanner::sinUTF16LEHitHandler
549     );
550 
551     const string REGEX16("[^0-9][0-9]{3}-[0-9]{3}-[0-9]{3}" + END);
552 
553     new Handler(
554       *this,
555       REGEX16,
556       OnlyUTF8Encoding,
557       DefaultOptions,
558       &Scanner::sinHitHandler2
559     );
560 
561     new Handler(
562       *this,
563       REGEX16,
564       OnlyUTF16LEEncoding,
565       DefaultOptions,
566       &Scanner::sinUTF16LEHitHandler2
567     );
568 
569     // FIXME: leading context
570     // FIXME: trailing context
571     /* Possible BitLocker Recovery Key. */
572     const string BITLOCKER("[^\\z30-\\z39]([0-9]{6}-){7}[0-9]{6}[^\\z30-\\z39]");
573 
574     new Handler(
575       *this,
576       BITLOCKER,
577       OnlyUTF8Encoding,
578       DefaultOptions,
579       &Scanner::bitlockerHitHandler
580     );
581 
582     const string BITLOCKER_UTF16LE("([^\\z30-\\z39]\\z00|[^\\z00])([0-9]{6}-){7}[0-9]{6}[^\\z30-\\z39]");
583 
584     new Handler(
585       *this,
586       BITLOCKER,
587       OnlyUTF16LEEncoding,
588       DefaultOptions,
589       &Scanner::bitlockerUTF16LEHitHandler
590     );
591   }
592 
initScan(const scanner_params & sp)593   void Scanner::initScan(const scanner_params& sp) {
594     CCN_Recorder = sp.fs.named_feature_recorder("ccn");
595     CCN_Track2_Recorder = sp.fs.named_feature_recorder("ccn_track2");
596     Telephone_Recorder = sp.fs.named_feature_recorder("telephone");
597     Alert_Recorder = sp.fs.get_alert_recorder();
598     PII_Recorder = sp.fs.named_feature_recorder("pii");
599     SIN_Recorder = sp.fs.named_feature_recorder("sin");
600   }
601 
ccnHitHandler(const LG_SearchHit & hit,const scanner_params & sp)602   void Scanner::ccnHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
603     const size_t pos = hit.Start + 1;
604     const size_t len = hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - pos;
605 
606     if (valid_ccn(reinterpret_cast<const char*>(sp.sbuf.buf)+pos, len)) {
607       CCN_Recorder->write_buf(sp.sbuf, pos, len);
608     }
609   }
610 
ccnUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)611   void Scanner::ccnUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
612     const size_t pos = hit.Start + (*(sp.sbuf.buf+hit.Start+1) == '\0' ? 2 : 1);    const size_t len = hit.End - pos;
613 
614     const string ascii(low_utf16le_to_ascii(sp.sbuf.buf+pos, len));
615     if (valid_ccn(ascii.c_str(), ascii.size())) {
616       CCN_Recorder->write_buf(sp.sbuf, pos, len);
617     }
618   }
619 
ccnTrack2HitHandler(const LG_SearchHit & hit,const scanner_params & sp)620   void Scanner::ccnTrack2HitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
621     const size_t pos = hit.Start + 1;
622     const size_t len = hit.End - pos;
623 
624     if (valid_ccn(reinterpret_cast<const char*>(sp.sbuf.buf)+pos, len)) {
625       CCN_Recorder->write_buf(sp.sbuf, pos, len);
626     }
627   }
628 
ccnTrack2UTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)629   void Scanner::ccnTrack2UTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
630     const size_t pos = hit.Start + (*(sp.sbuf.buf+hit.Start+1) == '\0' ? 2 : 1);
631     const size_t len = hit.End - pos;
632 
633     const string ascii(low_utf16le_to_ascii(sp.sbuf.buf+pos, len));
634     if (valid_ccn(ascii.c_str(), ascii.size())) {
635       CCN_Recorder->write_buf(sp.sbuf, pos, len);
636     }
637   }
638 
telephoneHitHandler(const LG_SearchHit & hit,const scanner_params & sp)639   void Scanner::telephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
640     Telephone_Recorder->write_buf(sp.sbuf, hit.Start+1, hit.End-hit.Start-1);
641   }
642 
telephoneUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)643   void Scanner::telephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
644     const size_t start = hit.Start + (*(sp.sbuf.buf + hit.Start + 1) == '\0' ? 2 : 1);
645     const size_t len = hit.End - start;
646 
647     Telephone_Recorder->write_buf(sp.sbuf, start, len);
648   }
649 
telephoneTrailingCtxHitHandler(const LG_SearchHit & hit,const scanner_params & sp)650   void Scanner::telephoneTrailingCtxHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
651     Telephone_Recorder->write_buf(
652       sp.sbuf,
653       hit.Start+1,
654       hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - (hit.Start+1)
655     );
656   }
657 
telephoneTrailingCtxUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)658   void Scanner::telephoneTrailingCtxUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
659     Telephone_Recorder->write_buf(
660       sp.sbuf,
661       hit.Start+1,
662       hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) -(hit.Start+1)
663     );
664   }
665 
validatedTelephoneHitHandler(const LG_SearchHit & hit,const scanner_params & sp)666   void Scanner::validatedTelephoneHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
667     const size_t pos = hit.Start + 1;
668     const size_t len = hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - pos;
669     if (valid_phone(sp.sbuf, pos, len)){
670       if (!is_pdf_box(sp.sbuf, pos)) {
671         Telephone_Recorder->write_buf(sp.sbuf, pos, len);
672       }
673     }
674   }
675 
validatedTelephoneUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)676   void Scanner::validatedTelephoneUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
677     const size_t pos = hit.Start + 1;
678     const size_t len = hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - pos;
679     if (valid_phone_utf16le(sp.sbuf, pos, len)){
680       Telephone_Recorder->write_buf(sp.sbuf, pos, len);
681     }
682   }
683 
bitlockerHitHandler(const LG_SearchHit & hit,const scanner_params & sp)684   void Scanner::bitlockerHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
685     Alert_Recorder->write(sp.sbuf.pos0 + hit.Start + 1, reinterpret_cast<const char*>(sp.sbuf.buf) + 1, "Possible BitLocker Recovery Key (ASCII).");
686   }
687 
bitlockerUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)688   void Scanner::bitlockerUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
689     const size_t pos = hit.Start + (*(sp.sbuf.buf + hit.Start + 1) == '\0' ? 2 : 1);
690     const size_t len = (hit.End - 1) - pos;
691 
692     Alert_Recorder->write(sp.sbuf.pos0 + pos, low_utf16le_to_ascii(sp.sbuf.buf + pos, len), "Possible BitLocker Recovery Key (UTF-16).");
693   }
694 
piiHitHandler(const LG_SearchHit & hit,const scanner_params & sp)695   void Scanner::piiHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
696     PII_Recorder->write_buf(
697       sp.sbuf, hit.Start,
698       hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - hit.Start
699     );
700   }
701 
piiUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)702   void Scanner::piiUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
703     PII_Recorder->write_buf(
704       sp.sbuf, hit.Start,
705       hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) - hit.Start
706     );
707   }
708 
sinHitHandler(const LG_SearchHit & hit,const scanner_params & sp)709   void Scanner::sinHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
710     SIN_Recorder->write_buf(
711       sp.sbuf, hit.Start,
712       hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - hit.Start
713     );
714   }
715 
sinUTF16LEHitHandler(const LG_SearchHit & hit,const scanner_params & sp)716   void Scanner::sinUTF16LEHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
717     SIN_Recorder->write_buf(
718       sp.sbuf, hit.Start,
719       hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) - hit.Start
720     );
721   }
722 
sinHitHandler2(const LG_SearchHit & hit,const scanner_params & sp)723   void Scanner::sinHitHandler2(const LG_SearchHit& hit, const scanner_params& sp) {
724     SIN_Recorder->write_buf(
725       sp.sbuf, hit.Start+1,
726       hit.End - (*(sp.sbuf.buf+hit.End-2) == '.' ? 2 : 1) - hit.Start
727     );
728   }
729 
sinUTF16LEHitHandler2(const LG_SearchHit & hit,const scanner_params & sp)730   void Scanner::sinUTF16LEHitHandler2(const LG_SearchHit& hit, const scanner_params& sp) {
731     SIN_Recorder->write_buf(
732       sp.sbuf, hit.Start+1,
733       hit.End - (*(sp.sbuf.buf+hit.End-3) == '.' ? 3 : 1) - hit.Start
734     );
735   }
736 
dateHitHandler(const LG_SearchHit & hit,const scanner_params & sp)737   void Scanner::dateHitHandler(const LG_SearchHit& hit, const scanner_params& sp) {
738     PII_Recorder->write_buf(sp.sbuf, hit.Start, hit.End - hit.Start);
739   }
740 
741   Scanner TheScanner;
742 }
743 
744 extern "C"
scan_accts_lg(struct scanner_params & sp)745 void scan_accts_lg(struct scanner_params &sp) {
746   scan_lg(accts::TheScanner, sp, rcb);
747 }
748 
749 #endif // HAVE_LIBLIGHTGREP
750