1 /*
2
3 File: file_txt.c
4
5 Copyright (C) 2005-2012 Christophe GRENIER <grenier@cgsecurity.org>
6
7 This software is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write the Free Software Foundation, Inc., 51
19 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26 #ifdef HAVE_STDLIB_H
27 #include <stdlib.h>
28 #endif
29 #ifdef HAVE_STRING_H
30 #include <string.h>
31 #endif
32 #ifdef HAVE_TIME_H
33 #include <time.h>
34 #endif
35 #include <ctype.h> /* tolower */
36 #include <stdio.h>
37 #include "types.h"
38 #include "common.h"
39 #include "filegen.h"
40 #include "log.h"
41 #include "memmem.h"
42 #include "file_txt.h"
43
44 extern const file_hint_t file_hint_doc;
45 extern const file_hint_t file_hint_jpg;
46 extern const file_hint_t file_hint_pdf;
47 extern const file_hint_t file_hint_sld;
48 extern const file_hint_t file_hint_tiff;
49 extern const file_hint_t file_hint_zip;
50
51 static inline int filtre(unsigned int car);
52
53 static void register_header_check_txt(file_stat_t *file_stat);
54 static int header_check_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
55 static void register_header_check_fasttxt(file_stat_t *file_stat);
56 static void register_header_check_snz(file_stat_t *file_stat);
57 static int header_check_fasttxt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
58 #ifdef UTF16
59 static int header_check_le16_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
60 #endif
61
62 const file_hint_t file_hint_snz= {
63 .extension="snz",
64 .description="Olfaction SeeNez odorama",
65 .max_filesize=PHOTOREC_MAX_FILE_SIZE,
66 .recover=1,
67 .enable_by_default=1,
68 .register_header_check=®ister_header_check_snz
69 };
70
71 const file_hint_t file_hint_fasttxt= {
72 .extension="tx?",
73 .description="Text files with header: rtf,xml,xhtml,mbox/imm,pm,ram,reg,sh,slk,stp,jad,url",
74 .max_filesize=PHOTOREC_MAX_FILE_SIZE,
75 .recover=1,
76 .enable_by_default=1,
77 .register_header_check=®ister_header_check_fasttxt
78 };
79
80 const file_hint_t file_hint_txt= {
81 .extension="txt",
82 .description="Other text files: txt,html,asp,bat,C,jsp,perl,php,py/emlx... scripts",
83 .max_filesize=PHOTOREC_MAX_FILE_SIZE,
84 .recover=1,
85 .enable_by_default=1,
86 .register_header_check=®ister_header_check_txt
87 };
88
89 static unsigned char ascii_char[256];
90
register_header_check_txt(file_stat_t * file_stat)91 static void register_header_check_txt(file_stat_t *file_stat)
92 {
93 unsigned int i;
94 for(i=0; i<256; i++)
95 ascii_char[i]=i;
96 for(i=0; i<256; i++)
97 {
98 if(filtre(i) || i==0xE2 || i==0xC2 || i==0xC3 || i==0xC5 || i==0xC6 || i==0xCB)
99 register_header_check(0, &ascii_char[i], 1, &header_check_txt, file_stat);
100 }
101 #ifdef UTF16
102 register_header_check(1, &ascii_char[0], 1, &header_check_le16_txt, file_stat);
103 #endif
104 }
105
106 typedef struct
107 {
108 const char *string;
109 const unsigned int len;
110 const char *extension;
111 } txt_header_t;
112
113 static const txt_header_t fasttxt_headers[] = {
114 /* Unix shell */
115 { "#!/bin/bash", 11, "sh"},
116 { "#!/bin/ksh", 10, "sh"},
117 { "#!/bin/sh", 9, "sh"},
118 { "#! /bin/bash", 12, "sh"},
119 { "#! /bin/ksh", 11, "sh"},
120 { "#! /bin/sh", 10, "sh"},
121 /* Opera Hotlist bookmark/contact list/notes */
122 { "Opera Hotlist version 2.0", 25, "adr"},
123 /* Microsoft VB Class module */
124 { "VERSION 1.0 CLASS\r\nBEGIN", 24, "cls"},
125 /* Cue sheet often begins by the music genre
126 * or by the filename
127 * http://wiki.hydrogenaudio.org/index.php?title=Cue_sheet */
128 { "REM GENRE ", 10, "cue"},
129 { "FILE \"", 6, "cue"},
130 /* Lotus Data Interchange Format */
131 { "TABLE\r\n0,1\r\n", 12, "dif"},
132 /* Designer, a Photobook Designer Software */
133 { "vSg4q7j8GLrtf", 13, "dp"},
134 { "-----BEGIN DSA PRIVATE KEY-----", 31, "dsa"},
135 /* EMKA IOX file */
136 { "1\t\t\t\t\tthis file\t", 16,
137 #ifdef DJGPP
138 "emk"
139 #else
140 "emka"
141 #endif
142 },
143 /* Source code in go language */
144 { "package main", 12, "go"},
145 /* ENVI */
146 { "ENVI\r\ndescription", 17, "hdr"},
147 /* Java Application Descriptor
148 * http://en.wikipedia.org/wiki/JAD_%28file_format%29 */
149 { "MIDlet-1:", 9, "jad"},
150 { "{\"title\":\"\",\"id\":1,\"dateAdded\":", 31, "json"},
151 { "-----BEGIN RSA PRIVATE KEY-----", 31, "key"},
152 /* Lyx http://www.lyx.org */
153 { "#LyX 1.", 7, "lyx"},
154 { "#LyX 2.", 7, "lyx"},
155 /* LilyPond http://lilypond.org*/
156 { "\n\\version \"", 11, "ly"},
157 /* Moving Picture Experts Group Audio Layer 3 Uniform Resource Locator */
158 { "#EXTM3U", 7, "m3u"},
159 /* http://www.mnemosyne-proj.org/
160 * flash-card program to help you memorise question/answer pairs */
161 { "--- Mnemosyne Data Base --- Format Version 2 ---", 48, "mem"},
162 /* Mozilla, firefox, thunderbird msf (Mail Summary File) */
163 { "// <!-- <mdb:mork:z", 19, "msf"},
164 /* MySQL, phpMyAdmin, PostgreSQL dump */
165 { "-- MySQL dump ", 14, "sql"},
166 { "-- phpMyAdmin SQL Dump", 22, "sql"},
167 { "--\n-- PostgreSQL database cluster dump", 38, "sql"},
168 { "--\r\n-- PostgreSQL database cluster dump", 39, "sql"},
169 { "---- BEGIN SSH2 PUBLIC KEY ----", 31, "ppk"},
170 { "PuTTY-User-Key-File-2:", 22, "ppk"},
171 { "-----BEGIN PGP PRIVATE KEY BLOCK-----", 37, "priv"},
172 { "-----BEGIN PGP PUBLIC KEY BLOCK-----", 36, "pub"},
173 /* PTGui, panoramic stitching software */
174 { "# ptGui project file", 20, "pts"},
175 { "ssh-dss AAAAB3", 14, "pub"},
176 { "ssh-rsa AAAAB3", 14, "pub"},
177 /* Quantum GIS */
178 { "<!DOCTYPE qgis ", 15, "qgs"},
179 /* Real Media */
180 { "rtsp://", 7, "ram"},
181 /* Windows registry config file */
182 { "REGEDIT4", 8, "reg"},
183 /* Reaper Project */
184 { "<REAPER_PROJECT ", 16, "rpp"},
185 /* Olfaction SeeNez subtitle */
186 { "#SeeNez ", 8, "SeeNezSST"},
187 /* Sylk, Multiplan Symbolic Link Interchange */
188 { "ID;PSCALC3", 10, "slk"},
189 /* ISO 10303 is an ISO standard for the computer-interpretable
190 * representation and exchange of industrial product data.
191 * - Industrial automation systems and integration - Product data representation and exchange
192 * - Standard for the Exchange of Product model data.
193 * */
194 { "ISO-10303-21;", 13, "stp"},
195 /* URL / Internet Shortcut */
196 { "[InternetShortcut]", 18, "url"},
197 /* Veeam Backup Metadata */
198 { "<BackupMeta Id=\"", 16, "vbm"},
199 /* Windows Play List*/
200 {"<?wpl version=\"1.0\"?>", 21, "wpl"},
201 /* Windows URL / Internet Shortcut */
202 {"BEGIN:VBKM", 10, "url"},
203 /* firefox session store */
204 { "({\"windows\":[{\"tabs\":[{\"entries\":[{\"url\":\"", 42,
205 #ifdef DJGPP
206 "js"
207 #else
208 "sessionstore.js"
209 #endif
210 },
211 /* Mathlab Model .mdl */
212 { "Model {", 7, "mdl"},
213 /* Windows folder settings for file explorer */
214 { "[.ShellClassInfo]", 17, "Desktop.ini" },
215 /* Fotobook */
216 { "<fotobook ", 10, "mcf"},
217 {NULL, 0, NULL}
218 };
219
220
221 // #define DEBUG_FILETXT
222
223 /* return 1 if char can be found in text file */
filtre(unsigned int car)224 static int filtre(unsigned int car)
225 {
226 switch(car)
227 {
228 case 0x7c: /* similar to | */
229 case 0x80: /* '€' */
230 case 0x92: /* '’' */
231 case 0x99: /* '™' */
232 case 0x9c: /* 'œ' */
233 case 0xa0: /* nonbreaking space */
234 case 0xa1: /* '¡' */
235 case 0xa2: /* '¢' */
236 case 0xa3: /* '£' */
237 case 0xa7: /* '§' */
238 case 0xa8: /* '¨' */
239 case 0xa9: /* '©' */
240 case 0xab: /* '«' */
241 case 0xae: /* '®' */
242 case 0xb0: /* '°' */
243 case 0xb4: /* '´' */
244 case 0xb7: /* '·' */
245 case 0xbb: /* '»' */
246 case 0xc0: /* 'À' */
247 case 0xc7: /* 'Ç' */
248 case 0xc9: /* 'É' */
249 case 0xd6: /* 'Ö' */
250 case 0xd7: /* '×' */
251 case 0xd9: /* 'Ù' */
252 case 0xdf: /* 'ß' */
253 case 0xe0: /* 'à' */
254 case 0xe1: /* 'á' */
255 case 0xe2: /* 'â' */
256 case 0xe3: /* 'ã' */
257 case 0xe4: /* 'ä' */
258 case 0xe6: /* 'æ' */
259 case 0xe7: /* 'ç' */
260 case 0xe8: /* 'è' */
261 case 0xe9: /* 'é' */
262 case 0xea: /* 'ê' */
263 case 0xeb: /* 'ë' */
264 case 0xed: /* 'í' */
265 case 0xee: /* 'î' */
266 case 0xef: /* 'ï' */
267 case 0xf4: /* 'ô' */
268 case 0xf6: /* 'ö' */
269 case 0xf8: /* 'ø' */
270 case 0xf9: /* 'ù' */
271 case 0xfa: /* 'ú' */
272 case 0xfb: /* 'û' */
273 case 0xfc: /* 'ü' */
274 return 1;
275 }
276 if(car=='\b' || car=='\t' || car=='\r' || car=='\n' ||
277 (car >= ' ' && car <= '~') ||
278 (car >= 0x82 && car <= 0x8d) ||
279 (car >= 0x93 && car <= 0x98))
280 return 1;
281 return 0;
282 }
283
284 /* destination should have an extra byte available for null terminator
285 return read size */
UTF2Lat(unsigned char * buffer_lower,const unsigned char * buffer,const int buf_len)286 int UTF2Lat(unsigned char *buffer_lower, const unsigned char *buffer, const int buf_len)
287 {
288 const unsigned char *p; /* pointers to actual position in source buffer */
289 unsigned char *q; /* pointers to actual position in destination buffer */
290 int i; /* counter of remaining bytes available in destination buffer */
291 for (i = buf_len, p = buffer, q = buffer_lower; p-buffer<buf_len && i > 0 && *p!='\0';)
292 {
293 const unsigned char *p_org=p;
294 if((*p & 0xf0)==0xe0 && (*(p+1) & 0xc0)==0x80 && (*(p+2) & 0xc0)==0x80)
295 { /* UTF8 l=3 */
296 #ifdef DEBUG_TXT
297 log_info("UTF8 l=3 0x%02x 0x%02x 0x%02x\n", *p, *(p+1),*(p+2));
298 #endif
299 *q = '\0';
300 switch (*p)
301 {
302 case 0xE2 :
303 switch (*(p+1))
304 {
305 case 0x80 :
306 switch (*(p+2))
307 {
308 case 0x93 : (*q) = 150; break;
309 case 0x94 : (*q) = 151; break;
310 case 0x98 : (*q) = 145; break;
311 /* case 0x99 : (*q) = 146; break; */
312 case 0x99 : (*q) = '\''; break;
313 case 0x9A : (*q) = 130; break;
314 case 0x9C : (*q) = 147; break;
315 case 0x9D : (*q) = 148; break;
316 case 0x9E : (*q) = 132; break;
317 case 0xA0 : (*q) = 134; break;
318 case 0xA1 : (*q) = 135; break;
319 case 0xA2 : (*q) = 149; break;
320 case 0xA6 : (*q) = 133; break;
321 case 0xB0 : (*q) = 137; break;
322 case 0xB9 : (*q) = 139; break;
323 case 0xBA : (*q) = 155; break;
324 }
325 break;
326 case 0x82 :
327 switch (*(p+2))
328 {
329 case 0xAC : (*q) = 128; break;
330 }
331 break;
332 case 0x84 :
333 switch (*(p+2))
334 {
335 case 0xA2 : (*q) = 153; break;
336 }
337 break;
338 }
339 break;
340 }
341 p+=3;
342 }
343 else if((*p & 0xe0)==0xc0 && (*(p+1) & 0xc0)==0x80)
344 { /* UTF8 l=2 */
345 *q = '\0';
346 switch (*p)
347 {
348 case 0xC2 :
349 (*q) = ((*(p+1)) | 0x80) & 0xBF; /* A0-BF and a few 80-9F */
350 if((*q)==0xA0)
351 (*q)=' ';
352 break;
353 case 0xC3 :
354 switch (*(p+1))
355 {
356 case 0xB3 : (*q) = 162; break;
357 default:
358 (*q) = (*(p+1)) | 0xC0; /* C0-FF */
359 break;
360 }
361 break;
362 case 0xC5 :
363 switch (*(p+1)) {
364 case 0x92 : (*q) = 140; break;
365 case 0x93 : (*q) = 156; break;
366 case 0xA0 : (*q) = 138; break;
367 case 0xA1 : (*q) = 154; break;
368 case 0xB8 : (*q) = 143; break;
369 case 0xBD : (*q) = 142; break;
370 case 0xBE : (*q) = 158; break;
371 }
372 break;
373 case 0xC6:
374 switch (*(p+1)) {
375 case 0x92 : (*q) = 131; break;
376 }
377 break;
378 case 0xCB :
379 switch (*(p+1)) {
380 case 0x86 : (*q) = 136; break;
381 case 0x9C : (*q) = 152; break;
382 }
383 break;
384 }
385 p+=2;
386 }
387 else
388 { /* Ascii UCS */
389 #ifdef DEBUG_TXT
390 log_info("UTF8 Ascii UCS 0x%02x\n", *p);
391 #endif
392 *q = tolower(*p);
393 p++;
394 }
395 if (*q=='\0' || filtre(*q)==0)
396 {
397 #ifdef DEBUG_TXT
398 log_warning("UTF2Lat reject 0x%x\n",*q);
399 #endif
400 *q = '\0';
401 return(p_org-buffer);
402 }
403 q++;
404 i--;
405 }
406 *q = '\0';
407 return(p-buffer);
408 }
409
UTFsize(const unsigned char * buffer,const unsigned int buf_len)410 static int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
411 {
412 const unsigned char *p=buffer; /* pointers to actual position in source buffer */
413 unsigned int i=0;
414 while(i<buf_len && *p!='\0')
415 {
416 /* Reject some invalid UTF-8 sequences */
417 if(*p==0xc0 || *p==0xc1 || *p==0xf7 || *p>=0xfd)
418 return i;
419 if((*p & 0xf0)==0xe0 && (i+2 >= buf_len || ((*(p+1) & 0xc0)==0x80 && (*(p+2) & 0xc0)==0x80)))
420 { /* UTF8 l=3 */
421 #ifdef DEBUG_TXT
422 log_info("UTFsize i=%u l=3\n", i);
423 #endif
424 p+=3;
425 i+=3;
426 }
427 else if((*p & 0xe0)==0xc0 && (i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
428 { /* UTF8 l=2 */
429 #ifdef DEBUG_TXT
430 log_info("UTFsize i=%u l=2\n", i);
431 #endif
432 p+=2;
433 i+=2;
434 }
435 else
436 { /* Ascii UCS */
437 #ifdef DEBUG_TXT
438 log_info("UTFsize i=%u l=1 ? *p=%c\n", i, *p);
439 #endif
440 switch(*p)
441 {
442 case 0x00:
443 case 0x01:
444 case 0x02:
445 case 0x03:
446 case 0x04:
447 case 0x05:
448 case 0x06:
449 case 0x07:
450 case 0x0b:
451 case 0x0c:
452 case 0x10:
453 case 0x11:
454 case 0x12:
455 case 0x13:
456 case 0x14:
457 case 0x15:
458 case 0x16:
459 case 0x17:
460 case 0x18:
461 case 0x19:
462 case 0x1a:
463 case 0x1b:
464 case 0x1c:
465 case 0x1d:
466 case 0x1e:
467 case 0x1f:
468 case 0x7f:
469 return i;
470 }
471 p++;
472 i++;
473 }
474 }
475 return (i<buf_len?i:buf_len);
476 }
477
data_check_html(const unsigned char * buffer,const unsigned int buffer_size,file_recovery_t * file_recovery)478 static data_check_t data_check_html(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
479 {
480 const char sign_html_end[] = "</html>";
481 const unsigned int i=UTFsize(&buffer[buffer_size/2], buffer_size/2);
482 unsigned int j;
483 for(j=(buffer_size/2>sizeof(sign_html_end)?buffer_size/2-sizeof(sign_html_end):0);
484 j+sizeof(sign_html_end)-1 < buffer_size;
485 j++)
486 {
487 if(buffer[j]=='<' && strncasecmp((const char *)&buffer[j], sign_html_end, sizeof(sign_html_end)-1)==0)
488 {
489 file_recovery->calculated_file_size+=j-buffer_size/2+sizeof(sign_html_end)-1;
490 return DC_STOP;
491 }
492 }
493 if(i<buffer_size/2)
494 {
495 if(i>=10)
496 file_recovery->calculated_file_size=file_recovery->file_size+i;
497 return DC_STOP;
498 }
499 file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2);
500 return DC_CONTINUE;
501 }
502
data_check_txt(const unsigned char * buffer,const unsigned int buffer_size,file_recovery_t * file_recovery)503 static data_check_t data_check_txt(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
504 {
505 const unsigned int i=UTFsize(&buffer[buffer_size/2], buffer_size/2);
506 if(i<buffer_size/2)
507 {
508 if(i>=10)
509 file_recovery->calculated_file_size=file_recovery->file_size+i;
510 return DC_STOP;
511 }
512 file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2);
513 return DC_CONTINUE;
514 }
515
data_check_ttd(const unsigned char * buffer,const unsigned int buffer_size,file_recovery_t * file_recovery)516 static data_check_t data_check_ttd(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
517 {
518 unsigned int i;
519 for(i=buffer_size/2; i<buffer_size; i++)
520 {
521 const unsigned char car=buffer[i];
522 if((car>='A' && car<='F') || (car >='0' && car <='9') || car==' ' || car=='\n')
523 continue;
524 file_recovery->calculated_file_size=file_recovery->file_size + i - buffer_size/2;
525 return DC_STOP;
526 }
527 file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2);
528 return DC_CONTINUE;
529 }
530
header_check_ttd(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)531 static int header_check_ttd(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
532 {
533 if(buffer[56]<'0' || buffer[56]>'9')
534 return 0;
535 reset_file_recovery(file_recovery_new);
536 file_recovery_new->data_check=&data_check_ttd;
537 file_recovery_new->file_check=&file_check_size;
538 file_recovery_new->extension="ttd";
539 return 1;
540 }
541
file_check_ers(file_recovery_t * file_recovery)542 static void file_check_ers(file_recovery_t *file_recovery)
543 {
544 file_search_footer(file_recovery, "DatasetHeader End", 17, 0);
545 file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
546 }
547
header_check_ers(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)548 static int header_check_ers(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
549 {
550 /* ER Mapper Rasters (ERS) */
551 reset_file_recovery(file_recovery_new);
552 file_recovery_new->data_check=&data_check_txt;
553 file_recovery_new->file_check=&file_check_ers;
554 file_recovery_new->extension="ers";
555 return 1;
556 }
557
header_check_ics(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)558 static int header_check_ics(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
559 {
560 const char *date_asc;
561 char *buffer2;
562 if(buffer[15]=='\0')
563 return 0;
564 reset_file_recovery(file_recovery_new);
565 file_recovery_new->data_check=&data_check_txt;
566 file_recovery_new->file_check=&file_check_size;
567 /* vcalendar */
568 file_recovery_new->extension="ics";
569 /* DTSTART:19970714T133000 ;Local time
570 * DTSTART:19970714T173000Z ;UTC time
571 * DTSTART;TZID=US-Eastern:19970714T133000 ;Local time and time
572 */
573 buffer2=(char *)MALLOC(buffer_size+1);
574 buffer2[buffer_size]='\0';
575 memcpy(buffer2, buffer, buffer_size);
576 date_asc=strstr(buffer2, "DTSTART");
577 if(date_asc!=NULL)
578 date_asc=strchr(date_asc, ':');
579 if(date_asc!=NULL && date_asc+1+14 < buffer2+buffer_size)
580 {
581 file_recovery_new->time=get_time_from_YYYYMMDD_HHMMSS(date_asc+1);
582 }
583 free(buffer2);
584 return 1;
585 }
586
header_check_perlm(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)587 static int header_check_perlm(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
588 {
589 unsigned int i;
590 const unsigned int buffer_size_test=(buffer_size < 2048 ? buffer_size : 2048);
591 for(i=0; i<128 && buffer[i]!=';' && buffer[i]!='\n'; i++);
592 if(buffer[i]!=';')
593 return 0;
594 reset_file_recovery(file_recovery_new);
595 file_recovery_new->data_check=&data_check_txt;
596 file_recovery_new->file_check=&file_check_size;
597 if( td_memmem(buffer, buffer_size_test, "class", 5)!=NULL ||
598 td_memmem(buffer, buffer_size_test, "private static", 14)!=NULL ||
599 td_memmem(buffer, buffer_size_test, "public interface", 16)!=NULL)
600 {
601 /* source code in java */
602 #ifdef DJGPP
603 file_recovery_new->extension="jav";
604 #else
605 file_recovery_new->extension="java";
606 #endif
607 }
608 else
609 {
610 /* perl module */
611 file_recovery_new->extension="pm";
612 }
613 return 1;
614 }
615
header_check_dc(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)616 static int header_check_dc(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
617 {
618 if(buffer[0]=='0' && buffer[1]=='0')
619 { /*
620 TSCe Survey Controller DC v10.0
621 */
622 reset_file_recovery(file_recovery_new);
623 file_recovery_new->data_check=&data_check_txt;
624 file_recovery_new->file_check=&file_check_size;
625 file_recovery_new->extension="dc";
626 return 1;
627 }
628 return 0;
629 }
630
file_rename_fods(file_recovery_t * file_recovery)631 static void file_rename_fods(file_recovery_t *file_recovery)
632 {
633 FILE *file;
634 char buffer[4096];
635 char *tmp;
636 size_t lu;
637 if((file=fopen(file_recovery->filename, "rb"))==NULL)
638 return;
639 if((lu=fread(&buffer, 1, sizeof(buffer)-1, file)) <= 0)
640 {
641 fclose(file);
642 return ;
643 }
644 buffer[lu]='\0';
645 tmp=strchr(buffer,'<');
646 while(tmp!=NULL)
647 {
648 if(strncasecmp(tmp, "<office:meta><dc:title>", 23)==0)
649 {
650 const char *title=tmp+23;
651 tmp=strchr(title,'<');
652 if(tmp!=NULL)
653 *tmp='\0';
654 file_rename(file_recovery, (const unsigned char*)title, strlen(title), 0, NULL, 1);
655 fclose(file);
656 return ;
657 }
658 tmp++;
659 tmp=strchr(tmp,'<');
660 }
661 fclose(file);
662 }
663
file_rename_html(file_recovery_t * file_recovery)664 static void file_rename_html(file_recovery_t *file_recovery)
665 {
666 FILE *file;
667 char buffer[4096];
668 char *tmp;
669 size_t lu;
670 if((file=fopen(file_recovery->filename, "rb"))==NULL)
671 return;
672 if((lu=fread(&buffer, 1, sizeof(buffer)-1, file)) <= 0)
673 {
674 fclose(file);
675 return ;
676 }
677 buffer[lu]='\0';
678 tmp=strchr(buffer,'<');
679 while(tmp!=NULL)
680 {
681 if(strncasecmp(tmp, "</head", 5)==0)
682 {
683 fclose(file);
684 return ;
685 }
686 if(strncasecmp(tmp, "<title>", 7)==0)
687 {
688 const char *title=tmp+7;
689 tmp=strchr(title,'<');
690 if(tmp!=NULL)
691 *tmp='\0';
692 file_rename(file_recovery, (const unsigned char*)title, strlen(title), 0, NULL, 1);
693 fclose(file);
694 return ;
695 }
696 tmp++;
697 tmp=strchr(tmp,'<');
698 }
699 fclose(file);
700 }
701
header_check_html(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)702 static int header_check_html(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
703 {
704 if(file_recovery->file_stat!=NULL &&
705 file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
706 strcmp(file_recovery->extension,"mbox")==0)
707 return 0;
708 if(buffer[14]==0)
709 return 0;
710 reset_file_recovery(file_recovery_new);
711 file_recovery_new->data_check=&data_check_html;
712 file_recovery_new->file_check=&file_check_size;
713 /* Hypertext Markup Language (HTML) */
714 #ifdef DJGPP
715 file_recovery_new->extension="htm";
716 #else
717 file_recovery_new->extension="html";
718 #endif
719 file_recovery_new->file_rename=&file_rename_html;
720 return 1;
721 }
722
file_check_vbm(file_recovery_t * file_recovery)723 static void file_check_vbm(file_recovery_t *file_recovery)
724 {
725 file_search_footer(file_recovery, "</BackupMeta>", 13, 0);
726 file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
727 }
728
header_check_vbm(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)729 static int header_check_vbm(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
730 {
731 reset_file_recovery(file_recovery_new);
732 file_recovery_new->data_check=&data_check_txt;
733 file_recovery_new->extension="vbm";
734 file_recovery_new->file_check=&file_check_vbm;
735 return 1;
736 }
737
file_check_gpx(file_recovery_t * file_recovery)738 static void file_check_gpx(file_recovery_t *file_recovery)
739 {
740 file_search_footer(file_recovery, "</gpx>", 6, 0);
741 file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
742 }
743
file_check_xml(file_recovery_t * file_recovery)744 static void file_check_xml(file_recovery_t *file_recovery)
745 {
746 file_search_footer(file_recovery, ">", 1, 0);
747 file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
748 }
749
file_check_svg(file_recovery_t * file_recovery)750 static void file_check_svg(file_recovery_t *file_recovery)
751 {
752 file_search_footer(file_recovery, "</svg>", 6, 0);
753 file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
754 }
755
data_check_xml_utf8(const unsigned char * buffer,const unsigned int buffer_size,file_recovery_t * file_recovery)756 static data_check_t data_check_xml_utf8(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
757 {
758 unsigned int i;
759 if(buffer_size<=8)
760 return DC_CONTINUE;
761 i=UTFsize(&buffer[buffer_size/2+4], buffer_size/2-4)+4;
762 if(i<buffer_size/2)
763 {
764 file_recovery->calculated_file_size=file_recovery->file_size+i;
765 return DC_STOP;
766 }
767 file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2);
768 file_recovery->data_check=&data_check_txt;
769 return DC_CONTINUE;
770 }
771
header_check_xml_utf8(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)772 static int header_check_xml_utf8(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
773 {
774 const char *tmp;
775 /* buffer may not be null-terminated */
776 char *buf=(char *)MALLOC(buffer_size+1);
777 memcpy(buf, buffer, buffer_size);
778 buf[buffer_size]='\0';
779 reset_file_recovery(file_recovery_new);
780 file_recovery_new->data_check=&data_check_xml_utf8;
781 file_recovery_new->extension=NULL;
782 tmp=strchr(buf,'<');
783 while(tmp!=NULL && file_recovery_new->extension==NULL)
784 {
785 if(strncasecmp(tmp, "<Archive name=\"Root\">", 8)==0)
786 {
787 /* Grasshopper archive */
788 file_recovery_new->extension="ghx";
789 }
790 tmp++;
791 tmp=strchr(tmp,'<');
792 }
793 if(file_recovery_new->extension==NULL)
794 {
795 file_recovery_new->extension="xml";
796 }
797 file_recovery_new->file_check=&file_check_xml;
798 free(buf);
799 return 1;
800 }
801
header_check_xml_utf16(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)802 static int header_check_xml_utf16(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
803 {
804 /* Avoid false positive with .sldprt */
805 if(file_recovery->file_stat!=NULL &&
806 file_recovery->file_stat->file_hint==&file_hint_doc)
807 return 0;
808 reset_file_recovery(file_recovery_new);
809 file_recovery_new->extension="xml";
810 return 1;
811 }
812
header_check_xml(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)813 static int header_check_xml(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
814 {
815 const char *tmp;
816 /* buffer may not be null-terminated */
817 char *buf=(char *)MALLOC(buffer_size+1);
818 memcpy(buf, buffer, buffer_size);
819 buf[buffer_size]='\0';
820 reset_file_recovery(file_recovery_new);
821 file_recovery_new->data_check=&data_check_txt;
822 file_recovery_new->extension=NULL;
823 tmp=strchr(buf,'<');
824 while(tmp!=NULL && file_recovery_new->extension==NULL)
825 {
826 if(strncasecmp(tmp, "<Grisbi>", 8)==0)
827 {
828 /* Grisbi - Personal Finance Manager XML data */
829 file_recovery_new->extension="gsb";
830 }
831 else if(strncasecmp(tmp, "<collection type=\"GC", 20)==0)
832 {
833 /* GCstart, personal collections manager, http://www.gcstar.org/ */
834 file_recovery_new->extension="gcs";
835 }
836 else if(strncasecmp(tmp, "<html", 5)==0)
837 {
838 file_recovery_new->data_check=&data_check_html;
839 #ifdef DJGPP
840 file_recovery_new->extension="htm";
841 #else
842 file_recovery_new->extension="html";
843 #endif
844 file_recovery_new->file_rename=&file_rename_html;
845 }
846 else if(strncasecmp(tmp, "<Version>QBFSD", 14)==0)
847 {
848 /* QuickBook */
849 file_recovery_new->extension="fst";
850 }
851 else if(strncasecmp(tmp, "<svg", 4)==0)
852 {
853 /* Scalable Vector Graphics */
854 file_recovery_new->extension="svg";
855 file_recovery_new->file_check=&file_check_svg;
856 free(buf);
857 return 1;
858 }
859 else if(strncasecmp(tmp, "<!DOCTYPE plist ", 16)==0)
860 {
861 /* Mac OS X property list */
862 #ifdef DJGPP
863 file_recovery_new->extension="pli";
864 #else
865 file_recovery_new->extension="plist";
866 #endif
867 }
868 else if(strncasecmp(tmp, "<gpx ", 5)==0)
869 {
870 /* GPS eXchange Format */
871 file_recovery_new->extension="gpx";
872 file_recovery_new->file_check=&file_check_gpx;
873 free(buf);
874 return 1;
875 }
876 else if(strncasecmp(tmp, "<PremiereData Version=", 22)==0)
877 {
878 /* Adobe Premiere project */
879 file_recovery_new->data_check=NULL;
880 file_recovery_new->extension="prproj";
881 }
882 else if(strncasecmp(tmp, "<SCRIBUS", 8)==0)
883 {
884 /* Scribus XML file */
885 file_recovery_new->extension="sla";
886 }
887 else if(strncasecmp(tmp, "<FictionBook", 12)==0)
888 {
889 /* FictionBook, see http://www.fictionbook.org */
890 file_recovery_new->extension="fb2";
891 }
892 else if(strncasecmp(tmp, "<office:document", 16)==0)
893 {
894 /* OpenDocument Flat XML Spreadsheet */
895 file_recovery_new->extension="fods";
896 file_recovery_new->data_check=NULL;
897 file_recovery_new->file_rename=&file_rename_fods;
898 }
899 tmp++;
900 tmp=strchr(tmp,'<');
901 }
902 if(file_recovery_new->extension==NULL)
903 {
904 /* XML Extensible Markup Language */
905 file_recovery_new->extension="xml";
906 }
907 file_recovery_new->file_check=&file_check_xml;
908 free(buf);
909 return 1;
910 }
911
header_check_rtf(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)912 static int header_check_rtf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
913 {
914 unsigned int i;
915 for(i=0; i<16; i++)
916 if(buffer[i]=='\0')
917 return 0;
918 /* Avoid a false positive with .snt */
919 if(file_recovery->file_stat!=NULL &&
920 file_recovery->file_stat->file_hint==&file_hint_doc)
921 return 0;
922 reset_file_recovery(file_recovery_new);
923 file_recovery_new->data_check=&data_check_txt;
924 file_recovery_new->file_check=&file_check_size;
925 /* Rich Text Format */
926 file_recovery_new->extension="rtf";
927 return 1;
928 }
929
header_check_xmp(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)930 static int header_check_xmp(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
931 {
932 if(buffer[35]=='\0')
933 return 0;
934 if(file_recovery->file_stat!=NULL &&
935 (file_recovery->file_stat->file_hint==&file_hint_jpg ||
936 file_recovery->file_stat->file_hint==&file_hint_pdf ||
937 file_recovery->file_stat->file_hint==&file_hint_tiff))
938 return 0;
939 /* Adobe's Extensible Metadata Platform */
940 reset_file_recovery(file_recovery_new);
941 file_recovery_new->data_check=&data_check_txt;
942 file_recovery_new->file_check=&file_check_size;
943 file_recovery_new->extension="xmp";
944 return 1;
945 }
946
file_check_thunderbird(file_recovery_t * file_recovery)947 static void file_check_thunderbird(file_recovery_t *file_recovery)
948 {
949 if(file_recovery->file_size<file_recovery->calculated_file_size)
950 {
951 file_recovery->file_size=0;
952 return;
953 }
954 file_recovery->file_size=file_recovery->calculated_file_size;
955 }
956
header_check_thunderbird(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)957 static int header_check_thunderbird(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
958 {
959 unsigned int i;
960 if(file_recovery->file_stat!=NULL &&
961 file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
962 strcmp(file_recovery->extension,"mbox")==0)
963 return 0;
964 for(i=0; i<64; i++)
965 if(buffer[i]==0)
966 return 0;
967 reset_file_recovery(file_recovery_new);
968 file_recovery_new->data_check=&data_check_txt;
969 file_recovery_new->file_check=&file_check_thunderbird;
970 file_recovery_new->extension="mbox";
971 return 1;
972 }
973
header_check_mbox(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)974 static int header_check_mbox(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
975 {
976 unsigned int i;
977 if(file_recovery->file_stat!=NULL &&
978 file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
979 strcmp(file_recovery->extension,"mbox")==0)
980 return 0;
981 for(i=0; i<64; i++)
982 if(buffer[i]==0)
983 return 0;
984 if( memcmp(buffer, "From ", 5)==0 &&
985 memcmp(buffer, "From MAILER-DAEMON ", 19)!=0)
986 {
987 /* From someone@somewhere */
988 for(i=5; i<200 && buffer[i]!=' ' && buffer[i]!='@'; i++);
989 if(buffer[i]!='@')
990 return 0;
991 }
992 reset_file_recovery(file_recovery_new);
993 file_recovery_new->data_check=&data_check_txt;
994 file_recovery_new->file_check=&file_check_size;
995 /* Incredimail has .imm extension but this extension isn't frequent */
996 file_recovery_new->extension="mbox";
997 return 1;
998 }
999
header_check_fasttxt(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)1000 static int header_check_fasttxt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
1001 {
1002 const txt_header_t *header=&fasttxt_headers[0];
1003 while(header->len > 0)
1004 {
1005 if(memcmp(buffer, header->string, header->len)==0)
1006 {
1007 if(buffer[header->len]=='\0')
1008 return 0;
1009 reset_file_recovery(file_recovery_new);
1010 file_recovery_new->data_check=&data_check_txt;
1011 file_recovery_new->file_check=&file_check_size;
1012 file_recovery_new->extension=header->extension;
1013 file_recovery_new->min_filesize=header->len+1;
1014 return 1;
1015 }
1016 header++;
1017 }
1018 return 0;
1019 }
1020
is_ini(const char * buffer)1021 static int is_ini(const char *buffer)
1022 {
1023 const char *src=buffer;
1024 if(*src!='[')
1025 return 0;
1026 src++;
1027 while(1)
1028 {
1029 if(*src==']')
1030 {
1031 if(src > buffer + 3)
1032 return 1;
1033 return 0;
1034 }
1035 if(!isalnum(*src) && *src!=' ')
1036 return 0;
1037 src++;
1038 }
1039 }
1040
1041 #ifdef UTF16
header_check_le16_txt(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)1042 static int header_check_le16_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
1043 {
1044 unsigned int i;
1045 for(i=0; i+1 < buffer_size; i+=2)
1046 {
1047 if(!( buffer[i+1]=='\0' && (isprint(buffer[i]) || buffer[i]=='\n' || buffer[i]=='\r' || buffer[i]==0xbb)))
1048 {
1049 if(i<40)
1050 return 0;
1051 reset_file_recovery(file_recovery_new);
1052 file_recovery_new->calculated_file_size=i;
1053 file_recovery_new->data_check=&data_check_size;
1054 file_recovery_new->file_check=&file_check_size;
1055 file_recovery_new->extension="utf16";
1056 return 1;
1057 }
1058 }
1059 reset_file_recovery(file_recovery_new);
1060 file_recovery_new->calculated_file_size=i;
1061 file_recovery_new->data_check=&data_check_size;
1062 file_recovery_new->file_check=&file_check_size;
1063 file_recovery_new->extension="utf16";
1064 return 1;
1065 }
1066 #endif
1067
file_check_emlx(file_recovery_t * file_recovery)1068 static void file_check_emlx(file_recovery_t *file_recovery)
1069 {
1070 if(file_recovery->file_size < file_recovery->calculated_file_size)
1071 file_recovery->file_size=0;
1072 else
1073 {
1074 if(file_recovery->file_size > file_recovery->calculated_file_size+2048)
1075 file_recovery->file_size=file_recovery->calculated_file_size+2048;
1076 file_search_footer(file_recovery, "</plist>\n", 9, 0);
1077 }
1078 }
1079
header_check_txt(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)1080 static int header_check_txt(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
1081 {
1082 static char *buffer_lower=NULL;
1083 static unsigned int buffer_lower_size=0;
1084 unsigned int l;
1085 const unsigned int buffer_size_test=(buffer_size < 2048 ? buffer_size : 2048);
1086 {
1087 unsigned int i;
1088 unsigned int tmp=0;
1089 for(i=0;i<10 && isdigit(buffer[i]);i++)
1090 tmp=tmp*10+buffer[i]-'0';
1091 if(buffer[i]==0x0a &&
1092 (memcmp(buffer+i+1, "Return-Path: ", 13)==0 ||
1093 memcmp(buffer+i+1, "Received: from", 14)==0) &&
1094 !(file_recovery->file_stat!=NULL &&
1095 file_recovery->file_stat->file_hint==&file_hint_fasttxt &&
1096 strcmp(file_recovery->extension,"mbox")==0))
1097 {
1098 reset_file_recovery(file_recovery_new);
1099 file_recovery_new->calculated_file_size=tmp+i+1;
1100 file_recovery_new->data_check=NULL;
1101 file_recovery_new->file_check=&file_check_emlx;
1102 /* Mac OSX mail */
1103 file_recovery_new->extension="emlx";
1104 return 1;
1105 }
1106 }
1107 if(strncasecmp((const char *)buffer, "@echo off", 9)==0)
1108 {
1109 if(buffer[9]=='\0')
1110 return 0;
1111 reset_file_recovery(file_recovery_new);
1112 file_recovery_new->data_check=&data_check_txt;
1113 file_recovery_new->file_check=&file_check_size;
1114 /* Dos/Windows batch */
1115 file_recovery_new->extension="bat";
1116 return 1;
1117 }
1118 if(strncasecmp((const char *)buffer, "<%@ language=\"vbscript", 22)==0)
1119 {
1120 if(buffer[22]=='\0')
1121 return 0;
1122 reset_file_recovery(file_recovery_new);
1123 file_recovery_new->data_check=&data_check_txt;
1124 file_recovery_new->file_check=&file_check_size;
1125 /* Microsoft Active Server Pages */
1126 file_recovery_new->extension="asp";
1127 return 1;
1128 }
1129 if(strncasecmp((const char *)buffer, "version 4.00\r\nbegin", 19)==0)
1130 {
1131 if(buffer[19]=='\0')
1132 return 0;
1133 reset_file_recovery(file_recovery_new);
1134 file_recovery_new->data_check=&data_check_txt;
1135 file_recovery_new->file_check=&file_check_size;
1136 /* Microsoft Visual Basic */
1137 file_recovery_new->extension="vb";
1138 return 1;
1139 }
1140 if(strncasecmp((const char *)buffer, "begin:vcard", 11)==0)
1141 {
1142 if(buffer[11]=='\0')
1143 return 0;
1144 reset_file_recovery(file_recovery_new);
1145 file_recovery_new->data_check=&data_check_txt;
1146 file_recovery_new->file_check=&file_check_size;
1147 /* vcard, electronic business cards */
1148 file_recovery_new->extension="vcf";
1149 return 1;
1150 }
1151 if(buffer[0]=='#' && buffer[1]=='!')
1152 {
1153 unsigned int ll=512-2;
1154 const unsigned char *haystack=(const unsigned char *)buffer+2;
1155 const unsigned char *res;
1156 res=(const unsigned char *)memchr(haystack,'\n',ll);
1157 if(res!=NULL)
1158 ll=res-haystack;
1159 if(td_memmem(haystack, ll, "perl", 4) != NULL)
1160 {
1161 reset_file_recovery(file_recovery_new);
1162 file_recovery_new->data_check=&data_check_txt;
1163 file_recovery_new->file_check=&file_check_size;
1164 /* Perl script */
1165 file_recovery_new->extension="pl";
1166 return 1;
1167 }
1168 if(td_memmem(haystack, ll, "python", 6) != NULL)
1169 {
1170 reset_file_recovery(file_recovery_new);
1171 file_recovery_new->data_check=&data_check_txt;
1172 file_recovery_new->file_check=&file_check_size;
1173 /* Python script */
1174 file_recovery_new->extension="py";
1175 return 1;
1176 }
1177 if(td_memmem(haystack, ll, "ruby", 4) != NULL)
1178 {
1179 reset_file_recovery(file_recovery_new);
1180 file_recovery_new->data_check=&data_check_txt;
1181 file_recovery_new->file_check=&file_check_size;
1182 /* Ruby script */
1183 file_recovery_new->extension="rb";
1184 return 1;
1185 }
1186 }
1187 if(safe_header_only!=0)
1188 {
1189 return 0;
1190 }
1191 if(file_recovery->file_stat!=NULL)
1192 {
1193 if(file_recovery->file_stat->file_hint == &file_hint_fasttxt ||
1194 file_recovery->file_stat->file_hint == &file_hint_txt)
1195 {
1196 if(strstr(file_recovery->filename,".html")==NULL)
1197 return 0;
1198 }
1199 else
1200 return 0;
1201 }
1202 if(buffer_lower_size<buffer_size_test+16)
1203 {
1204 free(buffer_lower);
1205 buffer_lower=NULL;
1206 }
1207 /* Don't malloc/free memory every time, small memory leak */
1208 if(buffer_lower==NULL)
1209 {
1210 buffer_lower_size=buffer_size_test+16;
1211 buffer_lower=(char *)MALLOC(buffer_lower_size);
1212 }
1213 l=UTF2Lat((unsigned char*)buffer_lower, buffer, buffer_size_test);
1214 if(l<10)
1215 return 0;
1216 {
1217 unsigned int line_nbr=0;
1218 unsigned int i;
1219 for(i=0; i<512 && i<l; i++)
1220 {
1221 if(buffer[i]=='\n')
1222 line_nbr++;
1223 }
1224 /* A text file must contains several lines */
1225 if(line_nbr==0)
1226 return 0;
1227 }
1228 if(strncasecmp((const char *)buffer, "rem ", 4)==0)
1229 {
1230 reset_file_recovery(file_recovery_new);
1231 file_recovery_new->data_check=&data_check_txt;
1232 file_recovery_new->file_check=&file_check_size;
1233 /* Dos/Windows batch */
1234 file_recovery_new->extension="bat";
1235 return 1;
1236 }
1237 if(strncasecmp((const char *)buffer, "dn: ", 4)==0)
1238 {
1239 reset_file_recovery(file_recovery_new);
1240 file_recovery_new->data_check=&data_check_txt;
1241 file_recovery_new->file_check=&file_check_size;
1242 file_recovery_new->extension="ldif";
1243 return 1;
1244 }
1245 {
1246 const char *ext=NULL;
1247 /* ind=~0: random
1248 * ind=~1: constant */
1249 double ind;
1250 unsigned int nbrf=0;
1251 unsigned int is_csv=1;
1252 char *str;
1253 /* Detect Fortran */
1254 {
1255 str=buffer_lower;
1256 while((str=strstr(str, "\n "))!=NULL)
1257 {
1258 nbrf++;
1259 str++;
1260 }
1261 }
1262 /* Detect csv */
1263 {
1264 unsigned int csv_per_line_current=0;
1265 unsigned int csv_per_line=0;
1266 unsigned int line_nbr=0;
1267 unsigned int i;
1268 for(i=0;i<l && is_csv>0;i++)
1269 {
1270 if(buffer_lower[i]==';')
1271 {
1272 csv_per_line_current++;
1273 }
1274 else if(buffer_lower[i]=='\n')
1275 {
1276 if(line_nbr==0)
1277 csv_per_line=csv_per_line_current;
1278 if(csv_per_line_current!=csv_per_line)
1279 is_csv=0;
1280 line_nbr++;
1281 csv_per_line_current=0;
1282 }
1283 }
1284 if(csv_per_line<1 || line_nbr<10)
1285 is_csv=0;
1286 }
1287 /* if(l>1) */
1288 {
1289 unsigned int stats[256];
1290 unsigned int i;
1291 memset(&stats, 0, sizeof(stats));
1292 for(i=0;i<l;i++)
1293 stats[(unsigned char)buffer_lower[i]]++;
1294 ind=0;
1295 for(i=0;i<256;i++)
1296 if(stats[i]>0)
1297 ind+=stats[i]*(stats[i]-1);
1298 ind=ind/l/(l-1);
1299 }
1300 /* Windows Autorun */
1301 if(strstr(buffer_lower, "[autorun]")!=NULL)
1302 ext="inf";
1303 /* Detect .ini */
1304 else if(buffer[0]=='[' && l>50 && is_ini(buffer_lower))
1305 ext="ini";
1306 /* php (Hypertext Preprocessor) script */
1307 else if(strstr(buffer_lower, "<?php")!=NULL)
1308 ext="php";
1309 /* Comma separated values */
1310 else if(is_csv>0)
1311 ext="csv";
1312 /* Detect LaTeX, C, PHP, JSP, ASP, HTML, C header */
1313 else if(strstr(buffer_lower, "\\begin{")!=NULL)
1314 ext="tex";
1315 else if(strstr(buffer_lower, "#include")!=NULL)
1316 ext="c";
1317 else if(l>20 && strstr(buffer_lower, "<%@")!=NULL)
1318 ext="jsp";
1319 else if(l>20 && strstr(buffer_lower, "<%=")!=NULL)
1320 ext="jsp";
1321 else if(l>20 && strstr(buffer_lower, "<% ")!=NULL)
1322 ext="asp";
1323 else if(strstr(buffer_lower, "<html")!=NULL)
1324 ext="html";
1325 else if(strstr(buffer_lower, "private static")!=NULL ||
1326 strstr(buffer_lower, "public interface")!=NULL)
1327 {
1328 #ifdef DJGPP
1329 ext="jav";
1330 #else
1331 ext="java";
1332 #endif
1333 }
1334 else if(strstr(buffer_lower, "\nimport (")!=NULL)
1335 {
1336 ext="go";
1337 }
1338 else if((str=strstr(buffer_lower, "\nimport "))!=NULL)
1339 {
1340 str+=8;
1341 while(*str!='\0' && *str!='\n' && *str!=';')
1342 str++;
1343 if(*str==';')
1344 ext="java";
1345 else
1346 ext="py";
1347 }
1348 else if(strstr(buffer_lower, "class ")!=NULL &&
1349 (l>=100 || file_recovery->file_stat==NULL))
1350 {
1351 #ifdef DJGPP
1352 ext="jav";
1353 #else
1354 ext="java";
1355 #endif
1356 }
1357 /* Fortran */
1358 else if(nbrf>10 && ind<0.9 && strstr(buffer_lower, "integer")!=NULL)
1359 ext="f";
1360 /* LilyPond http://lilypond.org*/
1361 else if(strstr(buffer_lower, "\\score {")!=NULL)
1362 ext="ly";
1363 /* C header file */
1364 else if(strstr(buffer_lower, "/*")!=NULL && l>50)
1365 ext="h";
1366 else if(l<100 || ind<0.03 || ind>0.90)
1367 ext=NULL;
1368 /* JavaScript Object Notation */
1369 else if(memcmp(buffer_lower, "{\"", 2)==0)
1370 ext="json";
1371 else
1372 ext=file_hint_txt.extension;
1373 if(ext==NULL)
1374 return 0;
1375 if(strcmp(ext,"txt")==0 &&
1376 (strstr(buffer_lower,"<br>")!=NULL || strstr(buffer_lower,"<p>")!=NULL))
1377 {
1378 ext="html";
1379 }
1380 if(file_recovery->file_stat!=NULL)
1381 {
1382 if(file_recovery->file_stat->file_hint == &file_hint_doc)
1383 {
1384 unsigned int i;
1385 unsigned int txt_nl=0;
1386 /* file_recovery->filename is .doc */
1387 if(ind>0.20)
1388 return 0;
1389 /* Unix: \n (0xA)
1390 * Dos: \r\n (0xD 0xA)
1391 * Doc: \r (0xD) */
1392 for(i=0; i<l-1; i++)
1393 {
1394 if(buffer_lower[i]=='\r' && buffer_lower[i+1]!='\n')
1395 return 0;
1396 }
1397 for(i=0; i<l && i<512; i++)
1398 if(buffer_lower[i]=='\n')
1399 txt_nl++;
1400 if(txt_nl<=1)
1401 return 0;
1402 }
1403 else if(file_recovery->file_stat->file_hint == &file_hint_fasttxt ||
1404 file_recovery->file_stat->file_hint == &file_hint_txt)
1405 {
1406 /* file_recovery->filename is a .html */
1407 buffer_lower[511]='\0';
1408 if(strstr(buffer_lower, "<html")==NULL)
1409 return 0;
1410 /* Special case: two consecutive HTML files */
1411 }
1412 }
1413 reset_file_recovery(file_recovery_new);
1414 if(strcmp(ext, "html")==0)
1415 {
1416 file_recovery_new->file_rename=&file_rename_html;
1417 file_recovery_new->data_check=&data_check_html;
1418 }
1419 else
1420 file_recovery_new->data_check=&data_check_txt;
1421 file_recovery_new->file_check=&file_check_size;
1422 file_recovery_new->extension=ext;
1423 return 1;
1424 }
1425 }
1426
file_check_smil(file_recovery_t * file_recovery)1427 static void file_check_smil(file_recovery_t *file_recovery)
1428 {
1429 file_search_footer(file_recovery, "</smil>", 7, 0);
1430 file_allow_nl(file_recovery, NL_BARENL|NL_CRLF|NL_BARECR);
1431 }
1432
header_check_smil(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)1433 static int header_check_smil(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
1434 {
1435 /* Synchronized Multimedia Integration Language
1436 * http://en.wikipedia.org/wiki/Synchronized_Multimedia_Integration_Language */
1437 reset_file_recovery(file_recovery_new);
1438 file_recovery_new->data_check=&data_check_txt;
1439 file_recovery_new->file_check=&file_check_smil;
1440 file_recovery_new->extension="smil";
1441 return 1;
1442 }
1443
header_check_stl(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)1444 static int header_check_stl(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
1445 {
1446 const unsigned int buffer_size_test=(buffer_size < 512? buffer_size : 512);
1447 if(td_memmem(buffer, buffer_size_test, "facet normal", 12)==NULL)
1448 return 0;
1449 /* StereoLithography - STL Ascii format
1450 * http://www.ennex.com/~fabbers/StL.asp */
1451 reset_file_recovery(file_recovery_new);
1452 file_recovery_new->data_check=&data_check_txt;
1453 file_recovery_new->file_check=&file_check_size;
1454 file_recovery_new->extension="stl";
1455 return 1;
1456 }
1457
header_check_svg(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)1458 static int header_check_svg(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
1459 {
1460 /* Scalable Vector Graphics */
1461 reset_file_recovery(file_recovery_new);
1462 file_recovery_new->extension="svg";
1463 file_recovery_new->file_check=&file_check_svg;
1464 return 1;
1465 }
1466
header_check_snz(const unsigned char * buffer,const unsigned int buffer_size,const unsigned int safe_header_only,const file_recovery_t * file_recovery,file_recovery_t * file_recovery_new)1467 static int header_check_snz(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
1468 {
1469 const unsigned int buffer_size_test=(buffer_size < 512? buffer_size : 512);
1470 const unsigned char *pos=(const unsigned char *)td_memmem(buffer, buffer_size_test, ".snz", 4);
1471 if(pos==NULL)
1472 return 0;
1473 reset_file_recovery(file_recovery_new);
1474 file_recovery_new->data_check=&data_check_txt;
1475 file_recovery_new->file_check=&file_check_size;
1476 file_recovery_new->extension="snz";
1477 file_recovery_new->min_filesize=pos-buffer;
1478 return 1;
1479 }
1480
register_header_check_snz(file_stat_t * file_stat)1481 static void register_header_check_snz(file_stat_t *file_stat)
1482 {
1483 register_header_check(0, "DEFAULT\n", 8, &header_check_snz, file_stat);
1484 register_header_check(0, "DEFAULT\r\n", 9, &header_check_snz, file_stat);
1485 }
1486
register_header_check_fasttxt(file_stat_t * file_stat)1487 static void register_header_check_fasttxt(file_stat_t *file_stat)
1488 {
1489 static const unsigned char header_xml_utf8[17] = {0xef, 0xbb, 0xbf, '<', '?', 'x', 'm', 'l', ' ', 'v', 'e', 'r', 's', 'i', 'o', 'n', '='};
1490 static const unsigned char header_xml_utf16[30] = {0xff, 0xfe, '<', 0, '?', 0, 'x', 0, 'm', 0, 'l', 0, ' ', 0, 'v', 0, 'e', 0, 'r', 0, 's', 0, 'i', 0, 'o', 0, 'n', 0, '=', 0};
1491 const txt_header_t *header=&fasttxt_headers[0];
1492 while(header->len > 0)
1493 {
1494 register_header_check(0, header->string, header->len, &header_check_fasttxt, file_stat);
1495 header++;
1496 }
1497 register_header_check(4, "SC V10", 6, &header_check_dc, file_stat);
1498 register_header_check(0, "DatasetHeader Begin", 19, &header_check_ers, file_stat);
1499 // register_header_check(0, "\n<!DOCTYPE html", 15, &header_check_html, file_stat);
1500 register_header_check(0, "<!DOCTYPE html", 14, &header_check_html, file_stat);
1501 register_header_check(0, "<!DOCTYPE HTML", 14, &header_check_html, file_stat);
1502 // register_header_check(0, "<html", 5, &header_check_html, file_stat);
1503 register_header_check(0, "BEGIN:VCALENDAR", 15, &header_check_ics, file_stat);
1504 register_header_check(0, "From - ", 7, &header_check_thunderbird, file_stat);
1505 register_header_check(0, "From ", 5, &header_check_mbox, file_stat);
1506 register_header_check(0, "Message-ID: ", 12, &header_check_mbox, file_stat);
1507 register_header_check(0, "MIME-Version:", 13, &header_check_mbox, file_stat);
1508 register_header_check(0, "Received: from ", 15, &header_check_mbox, file_stat);
1509 register_header_check(0, "Reply-To: ", 10, &header_check_mbox, file_stat);
1510 register_header_check(0, "Return-path: ", 13, &header_check_mbox, file_stat);
1511 register_header_check(0, "Return-Path: ", 13, &header_check_mbox, file_stat);
1512 register_header_check(0, "package ", 8, &header_check_perlm, file_stat);
1513 register_header_check(0, "package\t", 8, &header_check_perlm, file_stat);
1514 register_header_check(0, "{\\rtf", 5, &header_check_rtf, file_stat);
1515 register_header_check(0, "<smil>", 6, &header_check_smil, file_stat);
1516 register_header_check(0, "solid ", 6, &header_check_stl, file_stat);
1517 register_header_check(0, "<?xml version=", 14, &header_check_xml, file_stat);
1518 register_header_check(0, header_xml_utf8, sizeof(header_xml_utf8), &header_check_xml_utf8, file_stat);
1519 register_header_check(0, header_xml_utf16, sizeof(header_xml_utf16), &header_check_xml_utf16, file_stat);
1520 /* Veeam Backup */
1521 register_header_check(0, "<BackupMeta Version=", 20, &header_check_vbm, file_stat);
1522 /* TinyTag */
1523 register_header_check(0, "FF 09 FF FF FF FF FF FF FF FF FF FF FF FF FF FF FFFF 00", 55, &header_check_ttd, file_stat);
1524 register_header_check(0, "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"", 35, &header_check_xmp, file_stat);
1525 register_header_check(0, "<svg xmlns=\"http://www.w3.org/2000/svg\"", 39, &header_check_svg, file_stat);
1526 }
1527