1 /*
2 This file is part of libextractor.
3 Copyright (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
19 */
20 /**
21 * @file plugins/test_ole2.c
22 * @brief testcase for ole2 plugin
23 * @author Christian Grothoff
24 */
25 #include "platform.h"
26 #include "test_lib.h"
27
28
29 /**
30 * Main function for the OLE2 testcase.
31 *
32 * @param argc number of arguments (ignored)
33 * @param argv arguments (ignored)
34 * @return 0 on success
35 */
36 int
main(int argc,char * argv[])37 main (int argc, char *argv[])
38 {
39 struct SolutionData ole2_msword_sol[] = {
40 {
41 EXTRACTOR_METATYPE_CREATOR,
42 EXTRACTOR_METAFORMAT_UTF8,
43 "text/plain",
44 "Nils Durner",
45 strlen ("Nils Durner") + 1,
46 0
47 },
48 {
49 EXTRACTOR_METATYPE_UNKNOWN_DATE,
50 EXTRACTOR_METAFORMAT_UTF8,
51 "text/plain",
52 "2005-03-21T06:11:12Z",
53 strlen ("2005-03-21T06:11:12Z") + 1,
54 0
55 },
56 {
57 EXTRACTOR_METATYPE_DESCRIPTION,
58 EXTRACTOR_METAFORMAT_UTF8,
59 "text/plain",
60 "This is a small document to test meta data extraction by GNU libextractor.",
61 strlen (
62 "This is a small document to test meta data extraction by GNU libextractor.")
63 + 1,
64 0
65 },
66 {
67 EXTRACTOR_METATYPE_KEYWORDS,
68 EXTRACTOR_METAFORMAT_UTF8,
69 "text/plain",
70 "ole ole2 eole2extractor",
71 strlen ("ole ole2 eole2extractor") + 1,
72 0
73 },
74 {
75 EXTRACTOR_METATYPE_SUBJECT,
76 EXTRACTOR_METAFORMAT_UTF8,
77 "text/plain",
78 "GNU libextractor",
79 strlen ("GNU libextractor") + 1,
80 0
81 },
82 {
83 EXTRACTOR_METATYPE_TITLE,
84 EXTRACTOR_METAFORMAT_UTF8,
85 "text/plain",
86 "Testcase for the ole2 extractor",
87 strlen ("Testcase for the ole2 extractor") + 1,
88 0
89 },
90 {
91 EXTRACTOR_METATYPE_LAST_SAVED_BY,
92 EXTRACTOR_METAFORMAT_UTF8,
93 "text/plain",
94 "Nils Durner",
95 strlen ("Nils Durner") + 1,
96 0
97 },
98 {
99 EXTRACTOR_METATYPE_CREATION_DATE,
100 EXTRACTOR_METAFORMAT_UTF8,
101 "text/plain",
102 "2005-03-21T06:10:19Z",
103 strlen ("2005-03-21T06:10:19Z") + 1,
104 0
105 },
106 {
107 EXTRACTOR_METATYPE_EDITING_CYCLES,
108 EXTRACTOR_METAFORMAT_UTF8,
109 "text/plain",
110 "2",
111 strlen ("2") + 1,
112 0
113 },
114 { 0, 0, NULL, NULL, 0, -1 }
115 };
116
117 struct SolutionData ole2_starwriter_sol[] = {
118 {
119 EXTRACTOR_METATYPE_CREATOR,
120 EXTRACTOR_METAFORMAT_UTF8,
121 "text/plain",
122 "Christian Grothoff",
123 strlen ("Christian Grothoff") + 1,
124 0
125 },
126 {
127 EXTRACTOR_METATYPE_UNKNOWN_DATE,
128 EXTRACTOR_METAFORMAT_UTF8,
129 "text/plain",
130 "2004-09-24T02:54:31Z",
131 strlen ("2004-09-24T02:54:31Z") + 1,
132 0
133 },
134 {
135 EXTRACTOR_METATYPE_DESCRIPTION,
136 EXTRACTOR_METAFORMAT_UTF8,
137 "text/plain",
138 "The comments",
139 strlen ("The comments") + 1,
140 0
141 },
142 {
143 EXTRACTOR_METATYPE_KEYWORDS,
144 EXTRACTOR_METAFORMAT_UTF8,
145 "text/plain",
146 "The Keywords",
147 strlen ("The Keywords") + 1,
148 0
149 },
150 {
151 EXTRACTOR_METATYPE_SUBJECT,
152 EXTRACTOR_METAFORMAT_UTF8,
153 "text/plain",
154 "The Subject",
155 strlen ("The Subject") + 1,
156 0
157 },
158 {
159 EXTRACTOR_METATYPE_TITLE,
160 EXTRACTOR_METAFORMAT_UTF8,
161 "text/plain",
162 "The Title",
163 strlen ("The Title") + 1,
164 0
165 },
166 {
167 EXTRACTOR_METATYPE_LAST_SAVED_BY,
168 EXTRACTOR_METAFORMAT_UTF8,
169 "text/plain",
170 "Christian Grothoff",
171 strlen ("Christian Grothoff") + 1,
172 0
173 },
174 {
175 EXTRACTOR_METATYPE_CREATION_DATE,
176 EXTRACTOR_METAFORMAT_UTF8,
177 "text/plain",
178 "2004-09-24T02:53:15Z",
179 strlen ("2004-09-24T02:53:15Z") + 1,
180 0
181 },
182 {
183 EXTRACTOR_METATYPE_EDITING_CYCLES,
184 EXTRACTOR_METAFORMAT_UTF8,
185 "text/plain",
186 "4",
187 strlen ("4") + 1,
188 0
189 },
190 {
191 EXTRACTOR_METATYPE_TITLE,
192 EXTRACTOR_METAFORMAT_UTF8,
193 "text/plain",
194 "The Title",
195 strlen ("The Title") + 1,
196 0
197 },
198 {
199 EXTRACTOR_METATYPE_SUBJECT,
200 EXTRACTOR_METAFORMAT_UTF8,
201 "text/plain",
202 "The Subject",
203 strlen ("The Subject") + 1,
204 0
205 },
206 {
207 EXTRACTOR_METATYPE_COMMENT,
208 EXTRACTOR_METAFORMAT_UTF8,
209 "text/plain",
210 "The comments",
211 strlen ("The comments") + 1,
212 0
213 },
214 {
215 EXTRACTOR_METATYPE_KEYWORDS,
216 EXTRACTOR_METAFORMAT_UTF8,
217 "text/plain",
218 "The Keywords",
219 strlen ("The Keywords") + 1,
220 0
221 },
222 { 0, 0, NULL, NULL, 0, -1 }
223 };
224 #if HAVE_ICONV
225 struct SolutionData ole2_blair_sol[] = {
226 {
227 EXTRACTOR_METATYPE_LANGUAGE,
228 EXTRACTOR_METAFORMAT_UTF8,
229 "text/plain",
230 "U.S. English",
231 strlen ("U.S. English") + 1,
232 0
233 },
234 {
235 EXTRACTOR_METATYPE_CREATOR,
236 EXTRACTOR_METAFORMAT_UTF8,
237 "text/plain",
238 "default",
239 strlen ("default") + 1,
240 0
241 },
242 {
243 EXTRACTOR_METATYPE_UNKNOWN_DATE,
244 EXTRACTOR_METAFORMAT_UTF8,
245 "text/plain",
246 "2003-02-03T11:18:00Z",
247 strlen ("2003-02-03T11:18:00Z") + 1,
248 0
249 },
250 {
251 EXTRACTOR_METATYPE_TITLE,
252 EXTRACTOR_METAFORMAT_UTF8,
253 "text/plain",
254 "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION",
255 strlen (
256 "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION")
257 + 1,
258 0
259 },
260 {
261 EXTRACTOR_METATYPE_CHARACTER_COUNT,
262 EXTRACTOR_METAFORMAT_UTF8,
263 "text/plain",
264 "22090",
265 strlen ("22090") + 1,
266 0
267 },
268 {
269 EXTRACTOR_METATYPE_LAST_SAVED_BY,
270 EXTRACTOR_METAFORMAT_UTF8,
271 "text/plain",
272 "MKhan",
273 strlen ("MKhan") + 1,
274 0
275 },
276 {
277 EXTRACTOR_METATYPE_PAGE_COUNT,
278 EXTRACTOR_METAFORMAT_UTF8,
279 "text/plain",
280 "1",
281 strlen ("1") + 1,
282 0
283 },
284 {
285 EXTRACTOR_METATYPE_WORD_COUNT,
286 EXTRACTOR_METAFORMAT_UTF8,
287 "text/plain",
288 "3875",
289 strlen ("3875") + 1,
290 0
291 },
292 {
293 EXTRACTOR_METATYPE_CREATION_DATE,
294 EXTRACTOR_METAFORMAT_UTF8,
295 "text/plain",
296 "2003-02-03T09:31:00Z",
297 strlen ("2003-02-03T09:31:00Z") + 1,
298 0
299 },
300 {
301 EXTRACTOR_METATYPE_EDITING_CYCLES,
302 EXTRACTOR_METAFORMAT_UTF8,
303 "text/plain",
304 "4",
305 strlen ("4") + 1,
306 0
307 },
308 {
309 EXTRACTOR_METATYPE_MIMETYPE,
310 EXTRACTOR_METAFORMAT_UTF8,
311 "text/plain",
312 "application/vnd.ms-files",
313 strlen ("application/vnd.ms-files") + 1,
314 0
315 },
316 {
317 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
318 EXTRACTOR_METAFORMAT_UTF8,
319 "text/plain",
320 "Microsoft Word 8.0",
321 strlen ("Microsoft Word 8.0") + 1,
322 0
323 },
324 {
325 EXTRACTOR_METATYPE_TEMPLATE,
326 EXTRACTOR_METAFORMAT_UTF8,
327 "text/plain",
328 "Normal.dot",
329 strlen ("Normal.dot") + 1,
330 0
331 },
332 {
333 EXTRACTOR_METATYPE_LINE_COUNT,
334 EXTRACTOR_METAFORMAT_UTF8,
335 "text/plain",
336 "184",
337 strlen ("184") + 1,
338 0
339 },
340 {
341 EXTRACTOR_METATYPE_PARAGRAPH_COUNT,
342 EXTRACTOR_METAFORMAT_UTF8,
343 "text/plain",
344 "44",
345 strlen ("44") + 1,
346 0
347 },
348 {
349 EXTRACTOR_METATYPE_REVISION_HISTORY,
350 EXTRACTOR_METAFORMAT_UTF8,
351 "text/plain",
352 "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
353 strlen (
354 "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
355 + 1,
356 0
357 },
358 {
359 EXTRACTOR_METATYPE_REVISION_HISTORY,
360 EXTRACTOR_METAFORMAT_UTF8,
361 "text/plain",
362 "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
363 strlen (
364 "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
365 + 1,
366 0
367 },
368 {
369 EXTRACTOR_METATYPE_REVISION_HISTORY,
370 EXTRACTOR_METAFORMAT_UTF8,
371 "text/plain",
372 "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
373 strlen (
374 "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
375 + 1,
376 0
377 },
378 {
379 EXTRACTOR_METATYPE_REVISION_HISTORY,
380 EXTRACTOR_METAFORMAT_UTF8,
381 "text/plain",
382 "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'",
383 strlen (
384 "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'")
385 + 1,
386 0
387 },
388 {
389 EXTRACTOR_METATYPE_REVISION_HISTORY,
390 EXTRACTOR_METAFORMAT_UTF8,
391 "text/plain",
392 "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'",
393 strlen (
394 "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'") + 1,
395 0
396 },
397 {
398 EXTRACTOR_METATYPE_REVISION_HISTORY,
399 EXTRACTOR_METAFORMAT_UTF8,
400 "text/plain",
401 "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'",
402 strlen (
403 "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'")
404 + 1,
405 0
406 },
407 {
408 EXTRACTOR_METATYPE_REVISION_HISTORY,
409 EXTRACTOR_METAFORMAT_UTF8,
410 "text/plain",
411 "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'",
412 strlen (
413 "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'")
414 + 1,
415 0
416 },
417 {
418 EXTRACTOR_METATYPE_REVISION_HISTORY,
419 EXTRACTOR_METAFORMAT_UTF8,
420 "text/plain",
421 "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'",
422 strlen (
423 "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'")
424 + 1,
425 0
426 },
427 {
428 EXTRACTOR_METATYPE_REVISION_HISTORY,
429 EXTRACTOR_METAFORMAT_UTF8,
430 "text/plain",
431 "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'",
432 strlen (
433 "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'")
434 + 1,
435 0
436 },
437 {
438 EXTRACTOR_METATYPE_REVISION_HISTORY,
439 EXTRACTOR_METAFORMAT_UTF8,
440 "text/plain",
441 "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'",
442 strlen (
443 "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'")
444 + 1,
445 0
446 },
447 { 0, 0, NULL, NULL, 0, -1 }
448 };
449 #endif
450 struct SolutionData ole2_excel_sol[] = {
451 {
452 EXTRACTOR_METATYPE_CREATOR,
453 EXTRACTOR_METAFORMAT_UTF8,
454 "text/plain",
455 "JV",
456 strlen ("JV") + 1,
457 0
458 },
459 {
460 EXTRACTOR_METATYPE_LAST_SAVED_BY,
461 EXTRACTOR_METAFORMAT_UTF8,
462 "text/plain",
463 "JV",
464 strlen ("JV") + 1,
465 0
466 },
467 {
468 EXTRACTOR_METATYPE_CREATION_DATE,
469 EXTRACTOR_METAFORMAT_UTF8,
470 "text/plain",
471 "2002-03-20T21:26:28Z",
472 strlen ("2002-03-20T21:26:28Z") + 1,
473 0
474 },
475 {
476 EXTRACTOR_METATYPE_MIMETYPE,
477 EXTRACTOR_METAFORMAT_UTF8,
478 "text/plain",
479 "application/vnd.ms-files",
480 strlen ("application/vnd.ms-files") + 1,
481 0
482 },
483 {
484 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
485 EXTRACTOR_METAFORMAT_UTF8,
486 "text/plain",
487 "Microsoft Excel",
488 strlen ("Microsoft Excel") + 1,
489 0
490 },
491 { 0, 0, NULL, NULL, 0, -1 }
492 };
493
494 struct ProblemSet ps[] = {
495 { "testdata/ole2_msword.doc",
496 ole2_msword_sol },
497 { "testdata/ole2_starwriter40.sdw",
498 ole2_starwriter_sol },
499 #if HAVE_ICONV
500 { "testdata/ole2_blair.doc",
501 ole2_blair_sol },
502 #endif
503 { "testdata/ole2_excel.xls",
504 ole2_excel_sol },
505 { NULL, NULL }
506 };
507 return ET_main ("ole2", ps);
508 }
509
510
511 /* end of test_ole2.c */
512