1 /*
2      This file is part of libextractor.
3      Copyright (C) 2012 Vidyut Samanta and Christian Grothoff
4 
5      libextractor is free software; you can redistribute it and/or modify
6      it under the terms of the GNU General Public License as published
7      by the Free Software Foundation; either version 3, or (at your
8      option) any later version.
9 
10      libextractor is distributed in the hope that it will be useful, but
11      WITHOUT ANY WARRANTY; without even the implied warranty of
12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13      General Public License for more details.
14 
15      You should have received a copy of the GNU General Public License
16      along with libextractor; see the file COPYING.  If not, write to the
17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18      Boston, MA 02110-1301, USA.
19 */
20 /**
21  * @file plugins/test_ole2.c
22  * @brief testcase for ole2 plugin
23  * @author Christian Grothoff
24  */
25 #include "platform.h"
26 #include "test_lib.h"
27 
28 
29 /**
30  * Main function for the OLE2 testcase.
31  *
32  * @param argc number of arguments (ignored)
33  * @param argv arguments (ignored)
34  * @return 0 on success
35  */
36 int
main(int argc,char * argv[])37 main (int argc, char *argv[])
38 {
39   struct SolutionData ole2_msword_sol[] = {
40     {
41       EXTRACTOR_METATYPE_CREATOR,
42       EXTRACTOR_METAFORMAT_UTF8,
43       "text/plain",
44       "Nils Durner",
45       strlen ("Nils Durner") + 1,
46       0
47     },
48     {
49       EXTRACTOR_METATYPE_UNKNOWN_DATE,
50       EXTRACTOR_METAFORMAT_UTF8,
51       "text/plain",
52       "2005-03-21T06:11:12Z",
53       strlen ("2005-03-21T06:11:12Z") + 1,
54       0
55     },
56     {
57       EXTRACTOR_METATYPE_DESCRIPTION,
58       EXTRACTOR_METAFORMAT_UTF8,
59       "text/plain",
60       "This is a small document to test meta data extraction by GNU libextractor.",
61       strlen (
62         "This is a small document to test meta data extraction by GNU libextractor.")
63       + 1,
64       0
65     },
66     {
67       EXTRACTOR_METATYPE_KEYWORDS,
68       EXTRACTOR_METAFORMAT_UTF8,
69       "text/plain",
70       "ole ole2 eole2extractor",
71       strlen ("ole ole2 eole2extractor") + 1,
72       0
73     },
74     {
75       EXTRACTOR_METATYPE_SUBJECT,
76       EXTRACTOR_METAFORMAT_UTF8,
77       "text/plain",
78       "GNU libextractor",
79       strlen ("GNU libextractor") + 1,
80       0
81     },
82     {
83       EXTRACTOR_METATYPE_TITLE,
84       EXTRACTOR_METAFORMAT_UTF8,
85       "text/plain",
86       "Testcase for the ole2 extractor",
87       strlen ("Testcase for the ole2 extractor") + 1,
88       0
89     },
90     {
91       EXTRACTOR_METATYPE_LAST_SAVED_BY,
92       EXTRACTOR_METAFORMAT_UTF8,
93       "text/plain",
94       "Nils Durner",
95       strlen ("Nils Durner") + 1,
96       0
97     },
98     {
99       EXTRACTOR_METATYPE_CREATION_DATE,
100       EXTRACTOR_METAFORMAT_UTF8,
101       "text/plain",
102       "2005-03-21T06:10:19Z",
103       strlen ("2005-03-21T06:10:19Z") + 1,
104       0
105     },
106     {
107       EXTRACTOR_METATYPE_EDITING_CYCLES,
108       EXTRACTOR_METAFORMAT_UTF8,
109       "text/plain",
110       "2",
111       strlen ("2") + 1,
112       0
113     },
114     { 0, 0, NULL, NULL, 0, -1 }
115   };
116 
117   struct SolutionData ole2_starwriter_sol[] = {
118     {
119       EXTRACTOR_METATYPE_CREATOR,
120       EXTRACTOR_METAFORMAT_UTF8,
121       "text/plain",
122       "Christian Grothoff",
123       strlen ("Christian Grothoff") + 1,
124       0
125     },
126     {
127       EXTRACTOR_METATYPE_UNKNOWN_DATE,
128       EXTRACTOR_METAFORMAT_UTF8,
129       "text/plain",
130       "2004-09-24T02:54:31Z",
131       strlen ("2004-09-24T02:54:31Z") + 1,
132       0
133     },
134     {
135       EXTRACTOR_METATYPE_DESCRIPTION,
136       EXTRACTOR_METAFORMAT_UTF8,
137       "text/plain",
138       "The comments",
139       strlen ("The comments") + 1,
140       0
141     },
142     {
143       EXTRACTOR_METATYPE_KEYWORDS,
144       EXTRACTOR_METAFORMAT_UTF8,
145       "text/plain",
146       "The Keywords",
147       strlen ("The Keywords") + 1,
148       0
149     },
150     {
151       EXTRACTOR_METATYPE_SUBJECT,
152       EXTRACTOR_METAFORMAT_UTF8,
153       "text/plain",
154       "The Subject",
155       strlen ("The Subject") + 1,
156       0
157     },
158     {
159       EXTRACTOR_METATYPE_TITLE,
160       EXTRACTOR_METAFORMAT_UTF8,
161       "text/plain",
162       "The Title",
163       strlen ("The Title") + 1,
164       0
165     },
166     {
167       EXTRACTOR_METATYPE_LAST_SAVED_BY,
168       EXTRACTOR_METAFORMAT_UTF8,
169       "text/plain",
170       "Christian Grothoff",
171       strlen ("Christian Grothoff") + 1,
172       0
173     },
174     {
175       EXTRACTOR_METATYPE_CREATION_DATE,
176       EXTRACTOR_METAFORMAT_UTF8,
177       "text/plain",
178       "2004-09-24T02:53:15Z",
179       strlen ("2004-09-24T02:53:15Z") + 1,
180       0
181     },
182     {
183       EXTRACTOR_METATYPE_EDITING_CYCLES,
184       EXTRACTOR_METAFORMAT_UTF8,
185       "text/plain",
186       "4",
187       strlen ("4") + 1,
188       0
189     },
190     {
191       EXTRACTOR_METATYPE_TITLE,
192       EXTRACTOR_METAFORMAT_UTF8,
193       "text/plain",
194       "The Title",
195       strlen ("The Title") + 1,
196       0
197     },
198     {
199       EXTRACTOR_METATYPE_SUBJECT,
200       EXTRACTOR_METAFORMAT_UTF8,
201       "text/plain",
202       "The Subject",
203       strlen ("The Subject") + 1,
204       0
205     },
206     {
207       EXTRACTOR_METATYPE_COMMENT,
208       EXTRACTOR_METAFORMAT_UTF8,
209       "text/plain",
210       "The comments",
211       strlen ("The comments") + 1,
212       0
213     },
214     {
215       EXTRACTOR_METATYPE_KEYWORDS,
216       EXTRACTOR_METAFORMAT_UTF8,
217       "text/plain",
218       "The Keywords",
219       strlen ("The Keywords") + 1,
220       0
221     },
222     { 0, 0, NULL, NULL, 0, -1 }
223   };
224 #if HAVE_ICONV
225   struct SolutionData ole2_blair_sol[] = {
226     {
227       EXTRACTOR_METATYPE_LANGUAGE,
228       EXTRACTOR_METAFORMAT_UTF8,
229       "text/plain",
230       "U.S. English",
231       strlen ("U.S. English") + 1,
232       0
233     },
234     {
235       EXTRACTOR_METATYPE_CREATOR,
236       EXTRACTOR_METAFORMAT_UTF8,
237       "text/plain",
238       "default",
239       strlen ("default") + 1,
240       0
241     },
242     {
243       EXTRACTOR_METATYPE_UNKNOWN_DATE,
244       EXTRACTOR_METAFORMAT_UTF8,
245       "text/plain",
246       "2003-02-03T11:18:00Z",
247       strlen ("2003-02-03T11:18:00Z") + 1,
248       0
249     },
250     {
251       EXTRACTOR_METATYPE_TITLE,
252       EXTRACTOR_METAFORMAT_UTF8,
253       "text/plain",
254       "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION",
255       strlen (
256         "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION")
257       + 1,
258       0
259     },
260     {
261       EXTRACTOR_METATYPE_CHARACTER_COUNT,
262       EXTRACTOR_METAFORMAT_UTF8,
263       "text/plain",
264       "22090",
265       strlen ("22090") + 1,
266       0
267     },
268     {
269       EXTRACTOR_METATYPE_LAST_SAVED_BY,
270       EXTRACTOR_METAFORMAT_UTF8,
271       "text/plain",
272       "MKhan",
273       strlen ("MKhan") + 1,
274       0
275     },
276     {
277       EXTRACTOR_METATYPE_PAGE_COUNT,
278       EXTRACTOR_METAFORMAT_UTF8,
279       "text/plain",
280       "1",
281       strlen ("1") + 1,
282       0
283     },
284     {
285       EXTRACTOR_METATYPE_WORD_COUNT,
286       EXTRACTOR_METAFORMAT_UTF8,
287       "text/plain",
288       "3875",
289       strlen ("3875") + 1,
290       0
291     },
292     {
293       EXTRACTOR_METATYPE_CREATION_DATE,
294       EXTRACTOR_METAFORMAT_UTF8,
295       "text/plain",
296       "2003-02-03T09:31:00Z",
297       strlen ("2003-02-03T09:31:00Z") + 1,
298       0
299     },
300     {
301       EXTRACTOR_METATYPE_EDITING_CYCLES,
302       EXTRACTOR_METAFORMAT_UTF8,
303       "text/plain",
304       "4",
305       strlen ("4") + 1,
306       0
307     },
308     {
309       EXTRACTOR_METATYPE_MIMETYPE,
310       EXTRACTOR_METAFORMAT_UTF8,
311       "text/plain",
312       "application/vnd.ms-files",
313       strlen ("application/vnd.ms-files") + 1,
314       0
315     },
316     {
317       EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
318       EXTRACTOR_METAFORMAT_UTF8,
319       "text/plain",
320       "Microsoft Word 8.0",
321       strlen ("Microsoft Word 8.0") + 1,
322       0
323     },
324     {
325       EXTRACTOR_METATYPE_TEMPLATE,
326       EXTRACTOR_METAFORMAT_UTF8,
327       "text/plain",
328       "Normal.dot",
329       strlen ("Normal.dot") + 1,
330       0
331     },
332     {
333       EXTRACTOR_METATYPE_LINE_COUNT,
334       EXTRACTOR_METAFORMAT_UTF8,
335       "text/plain",
336       "184",
337       strlen ("184") + 1,
338       0
339     },
340     {
341       EXTRACTOR_METATYPE_PARAGRAPH_COUNT,
342       EXTRACTOR_METAFORMAT_UTF8,
343       "text/plain",
344       "44",
345       strlen ("44") + 1,
346       0
347     },
348     {
349       EXTRACTOR_METATYPE_REVISION_HISTORY,
350       EXTRACTOR_METAFORMAT_UTF8,
351       "text/plain",
352       "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
353       strlen (
354         "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
355       + 1,
356       0
357     },
358     {
359       EXTRACTOR_METATYPE_REVISION_HISTORY,
360       EXTRACTOR_METAFORMAT_UTF8,
361       "text/plain",
362       "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
363       strlen (
364         "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
365       + 1,
366       0
367     },
368     {
369       EXTRACTOR_METATYPE_REVISION_HISTORY,
370       EXTRACTOR_METAFORMAT_UTF8,
371       "text/plain",
372       "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
373       strlen (
374         "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
375       + 1,
376       0
377     },
378     {
379       EXTRACTOR_METATYPE_REVISION_HISTORY,
380       EXTRACTOR_METAFORMAT_UTF8,
381       "text/plain",
382       "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'",
383       strlen (
384         "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'")
385       + 1,
386       0
387     },
388     {
389       EXTRACTOR_METATYPE_REVISION_HISTORY,
390       EXTRACTOR_METAFORMAT_UTF8,
391       "text/plain",
392       "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'",
393       strlen (
394         "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'") + 1,
395       0
396     },
397     {
398       EXTRACTOR_METATYPE_REVISION_HISTORY,
399       EXTRACTOR_METAFORMAT_UTF8,
400       "text/plain",
401       "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'",
402       strlen (
403         "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'")
404       + 1,
405       0
406     },
407     {
408       EXTRACTOR_METATYPE_REVISION_HISTORY,
409       EXTRACTOR_METAFORMAT_UTF8,
410       "text/plain",
411       "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'",
412       strlen (
413         "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'")
414       + 1,
415       0
416     },
417     {
418       EXTRACTOR_METATYPE_REVISION_HISTORY,
419       EXTRACTOR_METAFORMAT_UTF8,
420       "text/plain",
421       "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'",
422       strlen (
423         "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'")
424       + 1,
425       0
426     },
427     {
428       EXTRACTOR_METATYPE_REVISION_HISTORY,
429       EXTRACTOR_METAFORMAT_UTF8,
430       "text/plain",
431       "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'",
432       strlen (
433         "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'")
434       + 1,
435       0
436     },
437     {
438       EXTRACTOR_METATYPE_REVISION_HISTORY,
439       EXTRACTOR_METAFORMAT_UTF8,
440       "text/plain",
441       "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'",
442       strlen (
443         "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'")
444       + 1,
445       0
446     },
447     { 0, 0, NULL, NULL, 0, -1 }
448   };
449 #endif
450   struct SolutionData ole2_excel_sol[] = {
451     {
452       EXTRACTOR_METATYPE_CREATOR,
453       EXTRACTOR_METAFORMAT_UTF8,
454       "text/plain",
455       "JV",
456       strlen ("JV") + 1,
457       0
458     },
459     {
460       EXTRACTOR_METATYPE_LAST_SAVED_BY,
461       EXTRACTOR_METAFORMAT_UTF8,
462       "text/plain",
463       "JV",
464       strlen ("JV") + 1,
465       0
466     },
467     {
468       EXTRACTOR_METATYPE_CREATION_DATE,
469       EXTRACTOR_METAFORMAT_UTF8,
470       "text/plain",
471       "2002-03-20T21:26:28Z",
472       strlen ("2002-03-20T21:26:28Z") + 1,
473       0
474     },
475     {
476       EXTRACTOR_METATYPE_MIMETYPE,
477       EXTRACTOR_METAFORMAT_UTF8,
478       "text/plain",
479       "application/vnd.ms-files",
480       strlen ("application/vnd.ms-files") + 1,
481       0
482     },
483     {
484       EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
485       EXTRACTOR_METAFORMAT_UTF8,
486       "text/plain",
487       "Microsoft Excel",
488       strlen ("Microsoft Excel") + 1,
489       0
490     },
491     { 0, 0, NULL, NULL, 0, -1 }
492   };
493 
494   struct ProblemSet ps[] = {
495     { "testdata/ole2_msword.doc",
496       ole2_msword_sol },
497     { "testdata/ole2_starwriter40.sdw",
498       ole2_starwriter_sol },
499 #if HAVE_ICONV
500     { "testdata/ole2_blair.doc",
501       ole2_blair_sol },
502 #endif
503     { "testdata/ole2_excel.xls",
504       ole2_excel_sol },
505     { NULL, NULL }
506   };
507   return ET_main ("ole2", ps);
508 }
509 
510 
511 /* end of test_ole2.c */
512