1 /*
2      This file is part of libextractor.
3      Copyright (C) 2002-2017 Vidyut Samanta and Christian Grothoff
4 
5      libextractor is free software; you can redistribute it and/or modify
6      it under the terms of the GNU General Public License as published
7      by the Free Software Foundation; either version 3, or (at your
8      option) any later version.
9 
10      libextractor is distributed in the hope that it will be useful, but
11      WITHOUT ANY WARRANTY; without even the implied warranty of
12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13      General Public License for more details.
14 
15      You should have received a copy of the GNU General Public License
16      along with libextractor; see the file COPYING.  If not, write to the
17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18      Boston, MA 02110-1301, USA.
19  */
20 
21 #ifndef EXTRACTOR_H
22 #define EXTRACTOR_H
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #if 0 /* keep Emacsens' auto-indent happy */
27 }
28 #endif
29 #endif
30 
31 
32 #include <stdint.h>
33 
34 /**
35  * 0.2.6-1 => 0x00020601
36  * 4.5.2-0 => 0x04050200
37  */
38 #define EXTRACTOR_VERSION 0x010B0000
39 
40 #include <stdio.h>
41 
42 #ifndef _EXTRACTOR_EXTERN
43 #if defined(_WIN32) && defined(MHD_W32LIB)
44 #define _EXTRACTOR_EXTERN extern
45 #elif defined (_WIN32) && defined(MHD_W32DLL)
46 /* Define MHD_W32DLL when using MHD as W32 .DLL to speed up linker a little */
47 #define _EXTRACTOR_EXTERN __declspec(dllimport)
48 #else
49 #define _EXTRACTOR_EXTERN extern
50 #endif
51 #endif
52 
53 /**
54  * Options for how plugin execution should be done.
55  */
56 enum EXTRACTOR_Options
57 {
58 
59   /**
60    * Run plugin out-of-process, starting the process once the plugin
61    * is to be run.  If a plugin crashes, automatically restart the
62    * respective process for the same file and try once more
63    * (since the crash may be caused by the previous file).  If
64    * the process crashes immediately again, it is not restarted
65    * until the next file.
66    */
67   EXTRACTOR_OPTION_DEFAULT_POLICY = 0,
68 
69   /**
70    * Deprecated option.  Ignored.
71    */
72   EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART = 1,
73 
74   /**
75    * Run plugins in-process.  Unsafe, not recommended,
76    * can be nice for debugging.
77    */
78   EXTRACTOR_OPTION_IN_PROCESS = 2,
79 
80   /**
81    * Internal value for plugins that have been disabled.
82    */
83   EXTRACTOR_OPTION_DISABLED = 3
84 
85 };
86 
87 
88 /**
89  * Format in which the extracted meta data is presented.
90  */
91 enum EXTRACTOR_MetaFormat
92 {
93   /**
94    * Format is unknown.
95    */
96   EXTRACTOR_METAFORMAT_UNKNOWN = 0,
97 
98   /**
99    * 0-terminated, UTF-8 encoded string.  "data_len"
100    * is strlen(data)+1.
101    */
102   EXTRACTOR_METAFORMAT_UTF8 = 1,
103 
104   /**
105    * Some kind of binary format, see given Mime type.
106    */
107   EXTRACTOR_METAFORMAT_BINARY = 2,
108 
109   /**
110    * 0-terminated string.  The specific encoding is unknown.
111    * "data_len" is strlen (data)+1.
112    */
113   EXTRACTOR_METAFORMAT_C_STRING = 3
114 
115 };
116 
117 
118 /**
119  * Enumeration defining various sources of keywords.  See also
120  * http://dublincore.org/documents/1998/09/dces/
121  *
122  * @defgroup types meta data types
123  * @{
124  */
125 enum EXTRACTOR_MetaType
126 {
127   /* fundamental types */
128   EXTRACTOR_METATYPE_RESERVED = 0,
129   EXTRACTOR_METATYPE_MIMETYPE = 1,
130   EXTRACTOR_METATYPE_FILENAME = 2,
131   EXTRACTOR_METATYPE_COMMENT = 3,
132 
133   /* Standard types from bibtex */
134   EXTRACTOR_METATYPE_TITLE = 4,
135   EXTRACTOR_METATYPE_BOOK_TITLE = 5,
136   EXTRACTOR_METATYPE_BOOK_EDITION = 6,
137   EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER = 7,
138   EXTRACTOR_METATYPE_JOURNAL_NAME = 8,
139   EXTRACTOR_METATYPE_JOURNAL_VOLUME = 9,
140   EXTRACTOR_METATYPE_JOURNAL_NUMBER = 10,
141   EXTRACTOR_METATYPE_PAGE_COUNT = 11,
142   EXTRACTOR_METATYPE_PAGE_RANGE = 12,
143   EXTRACTOR_METATYPE_AUTHOR_NAME = 13,
144   EXTRACTOR_METATYPE_AUTHOR_EMAIL = 14,
145   EXTRACTOR_METATYPE_AUTHOR_INSTITUTION = 15,
146   EXTRACTOR_METATYPE_PUBLISHER = 16,
147   EXTRACTOR_METATYPE_PUBLISHER_ADDRESS = 17,
148   EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION = 18,
149   EXTRACTOR_METATYPE_PUBLISHER_SERIES = 19,
150   EXTRACTOR_METATYPE_PUBLICATION_TYPE = 20,
151   EXTRACTOR_METATYPE_PUBLICATION_YEAR = 21,
152   EXTRACTOR_METATYPE_PUBLICATION_MONTH = 22,
153   EXTRACTOR_METATYPE_PUBLICATION_DAY = 23,
154   EXTRACTOR_METATYPE_PUBLICATION_DATE = 24,
155   EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25,
156   EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26,
157   EXTRACTOR_METATYPE_LANGUAGE = 27,
158   EXTRACTOR_METATYPE_CREATION_TIME = 28,
159   EXTRACTOR_METATYPE_URL = 29,
160 
161   /* "unique" document identifiers */
162   EXTRACTOR_METATYPE_URI = 30,
163   EXTRACTOR_METATYPE_ISRC = 31,
164   EXTRACTOR_METATYPE_HASH_MD4 = 32,
165   EXTRACTOR_METATYPE_HASH_MD5 = 33,
166   EXTRACTOR_METATYPE_HASH_SHA0 = 34,
167   EXTRACTOR_METATYPE_HASH_SHA1 = 35,
168   EXTRACTOR_METATYPE_HASH_RMD160 = 36,
169 
170   /* identifiers of a location */
171   EXTRACTOR_METATYPE_GPS_LATITUDE_REF = 37,
172   EXTRACTOR_METATYPE_GPS_LATITUDE = 38,
173   EXTRACTOR_METATYPE_GPS_LONGITUDE_REF = 39,
174   EXTRACTOR_METATYPE_GPS_LONGITUDE = 40,
175   EXTRACTOR_METATYPE_LOCATION_CITY = 41,
176   EXTRACTOR_METATYPE_LOCATION_SUBLOCATION = 42,
177   EXTRACTOR_METATYPE_LOCATION_COUNTRY = 43,
178   EXTRACTOR_METATYPE_LOCATION_COUNTRY_CODE = 44,
179 
180   /* generic attributes */
181   EXTRACTOR_METATYPE_UNKNOWN = 45,
182   EXTRACTOR_METATYPE_DESCRIPTION = 46,
183   EXTRACTOR_METATYPE_COPYRIGHT = 47,
184   EXTRACTOR_METATYPE_RIGHTS = 48,
185   EXTRACTOR_METATYPE_KEYWORDS = 49,
186   EXTRACTOR_METATYPE_ABSTRACT = 50,
187   EXTRACTOR_METATYPE_SUMMARY = 51,
188   EXTRACTOR_METATYPE_SUBJECT = 52,
189   EXTRACTOR_METATYPE_CREATOR = 53,
190   EXTRACTOR_METATYPE_FORMAT = 54,
191   EXTRACTOR_METATYPE_FORMAT_VERSION = 55,
192 
193   /* processing history */
194   EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE = 56,
195   EXTRACTOR_METATYPE_UNKNOWN_DATE = 57,
196   EXTRACTOR_METATYPE_CREATION_DATE = 58,
197   EXTRACTOR_METATYPE_MODIFICATION_DATE = 59,
198   EXTRACTOR_METATYPE_LAST_PRINTED = 60,
199   EXTRACTOR_METATYPE_LAST_SAVED_BY = 61,
200   EXTRACTOR_METATYPE_TOTAL_EDITING_TIME = 62,
201   EXTRACTOR_METATYPE_EDITING_CYCLES = 63,
202   EXTRACTOR_METATYPE_MODIFIED_BY_SOFTWARE = 64,
203   EXTRACTOR_METATYPE_REVISION_HISTORY = 65,
204 
205   EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE = 66,
206   EXTRACTOR_METATYPE_FINDER_FILE_TYPE = 67,
207   EXTRACTOR_METATYPE_FINDER_FILE_CREATOR = 68,
208 
209   /* software package specifics (deb, rpm, tgz, elf) */
210   EXTRACTOR_METATYPE_PACKAGE_NAME = 69,
211   EXTRACTOR_METATYPE_PACKAGE_VERSION = 70,
212   EXTRACTOR_METATYPE_SECTION = 71,
213   EXTRACTOR_METATYPE_UPLOAD_PRIORITY = 72,
214   EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY = 73,
215   EXTRACTOR_METATYPE_PACKAGE_CONFLICTS = 74,
216   EXTRACTOR_METATYPE_PACKAGE_REPLACES = 75,
217   EXTRACTOR_METATYPE_PACKAGE_PROVIDES = 76,
218   EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS = 77,
219   EXTRACTOR_METATYPE_PACKAGE_SUGGESTS = 78,
220   EXTRACTOR_METATYPE_PACKAGE_MAINTAINER = 79,
221   EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE = 80,
222   EXTRACTOR_METATYPE_PACKAGE_SOURCE = 81,
223   EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL = 82,
224   EXTRACTOR_METATYPE_TARGET_ARCHITECTURE = 83,
225   EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY = 84,
226   EXTRACTOR_METATYPE_LICENSE = 85,
227   EXTRACTOR_METATYPE_PACKAGE_DISTRIBUTION = 86,
228   EXTRACTOR_METATYPE_BUILDHOST = 87,
229   EXTRACTOR_METATYPE_VENDOR = 88,
230   EXTRACTOR_METATYPE_TARGET_OS = 89,
231   EXTRACTOR_METATYPE_SOFTWARE_VERSION = 90,
232   EXTRACTOR_METATYPE_TARGET_PLATFORM = 91,
233   EXTRACTOR_METATYPE_RESOURCE_TYPE = 92,
234   EXTRACTOR_METATYPE_LIBRARY_SEARCH_PATH = 93,
235   EXTRACTOR_METATYPE_LIBRARY_DEPENDENCY = 94,
236 
237   /* photography specifics */
238   EXTRACTOR_METATYPE_CAMERA_MAKE = 95,
239   EXTRACTOR_METATYPE_CAMERA_MODEL = 96,
240   EXTRACTOR_METATYPE_EXPOSURE = 97,
241   EXTRACTOR_METATYPE_APERTURE = 98,
242   EXTRACTOR_METATYPE_EXPOSURE_BIAS = 99,
243   EXTRACTOR_METATYPE_FLASH = 100,
244   EXTRACTOR_METATYPE_FLASH_BIAS = 101,
245   EXTRACTOR_METATYPE_FOCAL_LENGTH = 102,
246   EXTRACTOR_METATYPE_FOCAL_LENGTH_35MM = 103,
247   EXTRACTOR_METATYPE_ISO_SPEED = 104,
248   EXTRACTOR_METATYPE_EXPOSURE_MODE = 105,
249   EXTRACTOR_METATYPE_METERING_MODE = 106,
250   EXTRACTOR_METATYPE_MACRO_MODE = 107,
251   EXTRACTOR_METATYPE_IMAGE_QUALITY = 108,
252   EXTRACTOR_METATYPE_WHITE_BALANCE = 109,
253   EXTRACTOR_METATYPE_ORIENTATION = 110,
254   EXTRACTOR_METATYPE_MAGNIFICATION = 111,
255 
256   /* image specifics */
257   EXTRACTOR_METATYPE_IMAGE_DIMENSIONS = 112,
258   EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE = 113,
259   EXTRACTOR_METATYPE_THUMBNAIL = 114,
260   EXTRACTOR_METATYPE_IMAGE_RESOLUTION = 115,
261   EXTRACTOR_METATYPE_SOURCE = 116,
262 
263   /* (text) document processing specifics */
264   EXTRACTOR_METATYPE_CHARACTER_SET = 117,
265   EXTRACTOR_METATYPE_LINE_COUNT = 118,
266   EXTRACTOR_METATYPE_PARAGRAPH_COUNT = 119,
267   EXTRACTOR_METATYPE_WORD_COUNT = 120,
268   EXTRACTOR_METATYPE_CHARACTER_COUNT = 121,
269   EXTRACTOR_METATYPE_PAGE_ORIENTATION = 122,
270   EXTRACTOR_METATYPE_PAPER_SIZE = 123,
271   EXTRACTOR_METATYPE_TEMPLATE = 124,
272   EXTRACTOR_METATYPE_COMPANY = 125,
273   EXTRACTOR_METATYPE_MANAGER = 126,
274   EXTRACTOR_METATYPE_REVISION_NUMBER = 127,
275 
276   /* music / video specifics */
277   EXTRACTOR_METATYPE_DURATION = 128,
278   EXTRACTOR_METATYPE_ALBUM = 129,
279   EXTRACTOR_METATYPE_ARTIST = 130,
280   EXTRACTOR_METATYPE_GENRE = 131,
281   EXTRACTOR_METATYPE_TRACK_NUMBER = 132,
282   EXTRACTOR_METATYPE_DISC_NUMBER = 133,
283   EXTRACTOR_METATYPE_PERFORMER = 134,
284   EXTRACTOR_METATYPE_CONTACT_INFORMATION = 135,
285   EXTRACTOR_METATYPE_SONG_VERSION = 136,
286   EXTRACTOR_METATYPE_PICTURE = 137,
287   EXTRACTOR_METATYPE_COVER_PICTURE = 138,
288   EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE = 139,
289   EXTRACTOR_METATYPE_EVENT_PICTURE = 140,
290   EXTRACTOR_METATYPE_LOGO = 141,
291   EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM = 142,
292   EXTRACTOR_METATYPE_SOURCE_DEVICE = 143,
293   EXTRACTOR_METATYPE_DISCLAIMER = 144,
294   EXTRACTOR_METATYPE_WARNING = 145,
295   EXTRACTOR_METATYPE_PAGE_ORDER = 146,
296   EXTRACTOR_METATYPE_WRITER = 147,
297   EXTRACTOR_METATYPE_PRODUCT_VERSION = 148,
298   EXTRACTOR_METATYPE_CONTRIBUTOR_NAME = 149,
299   EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 150,
300   EXTRACTOR_METATYPE_NETWORK_NAME = 151,
301   EXTRACTOR_METATYPE_SHOW_NAME = 152,
302   EXTRACTOR_METATYPE_CHAPTER_NAME = 153,
303   EXTRACTOR_METATYPE_SONG_COUNT = 154,
304   EXTRACTOR_METATYPE_STARTING_SONG = 155,
305   EXTRACTOR_METATYPE_PLAY_COUNTER = 156,
306   EXTRACTOR_METATYPE_CONDUCTOR = 157,
307   EXTRACTOR_METATYPE_INTERPRETATION = 158,
308   EXTRACTOR_METATYPE_COMPOSER = 159,
309   EXTRACTOR_METATYPE_BEATS_PER_MINUTE = 160,
310   EXTRACTOR_METATYPE_ENCODED_BY = 161,
311   EXTRACTOR_METATYPE_ORIGINAL_TITLE = 162,
312   EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163,
313   EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164,
314   EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165,
315   EXTRACTOR_METATYPE_ORIGINAL_PERFORMER = 166,
316   EXTRACTOR_METATYPE_LYRICS = 167,
317   EXTRACTOR_METATYPE_POPULARITY_METER = 168,
318   EXTRACTOR_METATYPE_LICENSEE = 169,
319   EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 170,
320   EXTRACTOR_METATYPE_MOOD = 171,
321   EXTRACTOR_METATYPE_SUBTITLE = 172,
322 
323   /* GNUnet specific values (never extracted) */
324   EXTRACTOR_METATYPE_GNUNET_DISPLAY_TYPE = 173,
325   EXTRACTOR_METATYPE_GNUNET_FULL_DATA = 174,
326   EXTRACTOR_METATYPE_RATING = 175,
327   EXTRACTOR_METATYPE_ORGANIZATION = 176,
328   EXTRACTOR_METATYPE_RIPPER = 177,
329   EXTRACTOR_METATYPE_PRODUCER = 178,
330   EXTRACTOR_METATYPE_GROUP = 179,
331   EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME = 180,
332 
333   EXTRACTOR_METATYPE_DISC_COUNT = 181,
334 
335   EXTRACTOR_METATYPE_CODEC = 182,
336   EXTRACTOR_METATYPE_VIDEO_CODEC = 183,
337   EXTRACTOR_METATYPE_AUDIO_CODEC = 184,
338   EXTRACTOR_METATYPE_SUBTITLE_CODEC = 185,
339 
340   EXTRACTOR_METATYPE_CONTAINER_FORMAT = 186,
341 
342   EXTRACTOR_METATYPE_BITRATE = 187,
343   EXTRACTOR_METATYPE_NOMINAL_BITRATE = 188,
344   EXTRACTOR_METATYPE_MINIMUM_BITRATE = 189,
345   EXTRACTOR_METATYPE_MAXIMUM_BITRATE = 190,
346 
347   EXTRACTOR_METATYPE_SERIAL = 191,
348 
349   EXTRACTOR_METATYPE_ENCODER = 192,
350   EXTRACTOR_METATYPE_ENCODER_VERSION = 193,
351 
352   EXTRACTOR_METATYPE_TRACK_GAIN = 194,
353   EXTRACTOR_METATYPE_TRACK_PEAK = 195,
354   EXTRACTOR_METATYPE_ALBUM_GAIN = 196,
355   EXTRACTOR_METATYPE_ALBUM_PEAK = 197,
356   EXTRACTOR_METATYPE_REFERENCE_LEVEL = 198,
357 
358   EXTRACTOR_METATYPE_LOCATION_NAME = 199,
359   EXTRACTOR_METATYPE_LOCATION_ELEVATION = 200,
360   EXTRACTOR_METATYPE_LOCATION_HORIZONTAL_ERROR = 201,
361   EXTRACTOR_METATYPE_LOCATION_MOVEMENT_SPEED = 202,
362   EXTRACTOR_METATYPE_LOCATION_MOVEMENT_DIRECTION = 203,
363   EXTRACTOR_METATYPE_LOCATION_CAPTURE_DIRECTION = 204,
364 
365   EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER = 205,
366   EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER = 206,
367 
368   EXTRACTOR_METATYPE_GROUPING = 207,
369 
370   EXTRACTOR_METATYPE_DEVICE_MANUFACTURER = 208,
371   EXTRACTOR_METATYPE_DEVICE_MODEL = 209,
372 
373   EXTRACTOR_METATYPE_AUDIO_LANGUAGE = 210,
374   EXTRACTOR_METATYPE_CHANNELS = 211,
375   EXTRACTOR_METATYPE_SAMPLE_RATE = 212,
376   EXTRACTOR_METATYPE_AUDIO_DEPTH = 213,
377   EXTRACTOR_METATYPE_AUDIO_BITRATE = 214,
378   EXTRACTOR_METATYPE_MAXIMUM_AUDIO_BITRATE = 215,
379 
380   EXTRACTOR_METATYPE_VIDEO_DIMENSIONS = 216,
381   EXTRACTOR_METATYPE_VIDEO_DEPTH = 217,
382   EXTRACTOR_METATYPE_FRAME_RATE = 218,
383   EXTRACTOR_METATYPE_PIXEL_ASPECT_RATIO = 219,
384   EXTRACTOR_METATYPE_VIDEO_BITRATE = 220,
385   EXTRACTOR_METATYPE_MAXIMUM_VIDEO_BITRATE = 221,
386 
387   EXTRACTOR_METATYPE_SUBTITLE_LANGUAGE = 222,
388   EXTRACTOR_METATYPE_VIDEO_LANGUAGE = 223,
389 
390   EXTRACTOR_METATYPE_TOC = 224,
391 
392   EXTRACTOR_METATYPE_VIDEO_DURATION = 225,
393   EXTRACTOR_METATYPE_AUDIO_DURATION = 226,
394   EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227,
395 
396   EXTRACTOR_METATYPE_AUDIO_PREVIEW = 228,
397 
398   EXTRACTOR_METATYPE_NARINFO = 229,
399   EXTRACTOR_METATYPE_NAR = 230,
400 
401   EXTRACTOR_METATYPE_LAST = 231
402 };
403 
404 /** @} */ /* end of meta data types */
405 
406 /**
407  * Get the textual name of the keyword.
408  *
409  * @param type meta type to get a UTF-8 string for
410  * @return NULL if the type is not known, otherwise
411  *         an English (locale: C) string describing the type;
412  *         translate using `dgettext ("libextractor", rval)`
413  * @ingroup types
414  */
415 _EXTRACTOR_EXTERN const char *
416 EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type);
417 
418 
419 /**
420  * Get a long description for the meta type.
421  *
422  * @param type meta type to get a UTF-8 description for
423  * @return NULL if the type is not known, otherwise
424  *         an English (locale: C) string describing the type;
425  *         translate using `dgettext ("libextractor", rval)`
426  * @ingroup types
427  */
428 _EXTRACTOR_EXTERN const char *
429 EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type);
430 
431 
432 /**
433  * Return the highest type number, exclusive as in [0,max).
434  *
435  * @return highest legal metatype number for this version of libextractor
436  * @ingroup types
437  */
438 _EXTRACTOR_EXTERN enum EXTRACTOR_MetaType
439 EXTRACTOR_metatype_get_max (void);
440 
441 
442 /**
443  * Type of a function that libextractor calls for each
444  * meta data item found.
445  *
446  * @param cls closure (user-defined)
447  * @param plugin_name name of the plugin that produced this value;
448  *        special values can be used (i.e. '&lt;zlib&gt;' for zlib being
449  *        used in the main libextractor library and yielding
450  *        meta data).
451  * @param type libextractor-type describing the meta data
452  * @param format basic format information about @a data
453  * @param data_mime_type mime-type of @a data (not of the original file);
454  *        can be NULL (if mime-type is not known)
455  * @param data actual meta-data found
456  * @param data_len number of bytes in @a data
457  * @return 0 to continue extracting, 1 to abort
458  */
459 typedef int
460 (*EXTRACTOR_MetaDataProcessor) (void *cls,
461                                 const char *plugin_name,
462                                 enum EXTRACTOR_MetaType type,
463                                 enum EXTRACTOR_MetaFormat format,
464                                 const char *data_mime_type,
465                                 const char *data,
466                                 size_t data_len);
467 
468 
469 /**
470  * Context provided for plugins that perform meta data extraction.
471  */
472 struct EXTRACTOR_ExtractContext
473 {
474 
475   /**
476    * Closure argument to pass to all callbacks.
477    */
478   void *cls;
479 
480   /**
481    * Configuration string for the plugin.
482    */
483   const char *config;
484 
485   /**
486    * Obtain a pointer to up to @a size bytes of data from the file to process.
487    *
488    * @param cls the @e cls member of this struct
489    * @param data pointer to set to the file data, set to NULL on error
490    * @param size maximum number of bytes requested
491    * @return number of bytes now available in @a data (can be smaller than @a size),
492    *         -1 on error
493    */
494   ssize_t (*read) (void *cls,
495                    void **data,
496                    size_t size);
497 
498 
499   /**
500    * Seek in the file.  Use `SEEK_CUR` for @a whence and @a pos of 0 to
501    * obtain the current position in the file.
502    *
503    * @param cls the @e cls member of this struct
504    * @param pos position to seek (see 'man lseek')
505    * @param whence how to see (absolute to start, relative, absolute to end)
506    * @return new absolute position, -1 on error (i.e. desired position
507    *         does not exist)
508    */
509   int64_t (*seek) (void *cls,
510                    int64_t pos,
511                    int whence);
512 
513 
514   /**
515    * Determine the overall size of the file.
516    *
517    * @param cls the @a cls member of this struct
518    * @return overall file size, `UINT64_MAX` on error (i.e. IPC failure)
519    */
520   uint64_t (*get_size) (void *cls);
521 
522   /**
523    * Function to call on extracted data.
524    */
525   EXTRACTOR_MetaDataProcessor proc;
526 
527 };
528 
529 
530 /**
531  * Signature of the extract method that each plugin
532  * must provide.
533  *
534  * @param ec extraction context provided to the plugin
535  */
536 typedef void
537 (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext *ec);
538 
539 
540 /**
541  * Linked list of extractor plugins.  An application builds this list
542  * by telling libextractor to load various keyword-extraction
543  * plugins. Libraries can also be unloaded (removed from this list,
544  * see #EXTRACTOR_plugin_remove).
545  */
546 struct EXTRACTOR_PluginList;
547 
548 
549 /**
550  * Load the default set of plugins.  The default can be changed
551  * by setting the LIBEXTRACTOR_LIBRARIES environment variable;
552  * If it is set to "env", then this function will return
553  * #EXTRACTOR_plugin_add_config (NULL, env, flags).
554  *
555  * If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt
556  * to locate the installed plugins and load all of them.
557  * The directory where the code will search for plugins is typically
558  * automatically determined; it can be specified explicitly using the
559  * "LIBEXTRACTOR_PREFIX" environment variable.
560  *
561  * This environment variable must be set to the precise directory with
562  * the plugins (i.e. "/usr/lib/libextractor", not "/usr").  Note that
563  * setting the environment variable will disable all of the methods
564  * that are typically used to determine the location of plugins.
565  * Multiple paths can be specified using ':' to separate them.
566  *
567  * @param flags options for all of the plugins loaded
568  * @return the default set of plugins, NULL if no plugins were found
569  */
570 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
571 EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags);
572 
573 
574 /**
575  * Add a library for keyword extraction.
576  *
577  * @param prev the previous list of libraries, may be NULL
578  * @param library the name of the library (short handle, i.e. "mime")
579  * @param options options to give to the library
580  * @param flags options to use
581  * @return the new list of libraries, equal to prev iff an error occured
582  */
583 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
584 EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList *prev,
585                       const char *library,
586                       const char *options,
587                       enum EXTRACTOR_Options flags);
588 
589 
590 /**
591  * Load multiple libraries as specified by the user.
592  *
593  * @param config a string given by the user that defines which
594  *        libraries should be loaded. Has the format
595  *        "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*".
596  *        For example, 'mp3:ogg' loads the
597  *        mp3 and the ogg plugins. The '-' before the LIBRARYNAME
598  *        indicates that the library should be removed from
599  *        the library list.
600  * @param prev the  previous list of libraries, may be NULL
601  * @param flags options to use
602  * @return the new list of libraries, equal to prev iff an error occured
603  *         or if config was empty (or NULL).
604  */
605 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
606 EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList *prev,
607                              const char *config,
608                              enum EXTRACTOR_Options flags);
609 
610 
611 /**
612  * Remove a plugin from a list.
613  *
614  * @param prev the current list of plugins
615  * @param library the name of the plugin to remove (short handle)
616  * @return the reduced list, unchanged if the plugin was not loaded
617  */
618 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
619 EXTRACTOR_plugin_remove (struct EXTRACTOR_PluginList *prev,
620                          const char *library);
621 
622 
623 /**
624  * Remove all plugins from the given list (destroys the list).
625  *
626  * @param plugin the list of plugins
627  */
628 _EXTRACTOR_EXTERN void
629 EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins);
630 
631 
632 /**
633  * Extract keywords from a file using the given set of plugins.
634  *
635  * @param plugins the list of plugins to use
636  * @param filename the name of the file, can be NULL if @a data is not NULL
637  * @param data data of the file in memory, can be NULL (in which
638  *        case libextractor will open file) if filename is not NULL
639  * @param size number of bytes in @a data, ignored if @a data is NULL
640  * @param proc function to call for each meta data item found
641  * @param proc_cls cls argument to @a proc
642  */
643 _EXTRACTOR_EXTERN void
644 EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins,
645                    const char *filename,
646                    const void *data,
647                    size_t size,
648                    EXTRACTOR_MetaDataProcessor proc,
649                    void *proc_cls);
650 
651 
652 /**
653  * Simple #EXTRACTOR_MetaDataProcessor implementation that simply
654  * prints the extracted meta data to the given file.  Only prints
655  * those keywords that are in UTF-8 format.
656  *
657  * @param handle the file to write to (`stdout`, `stderr`), must NOT be NULL,
658  *               must be of type `FILE *`.
659  * @param plugin_name name of the plugin that produced this value
660  * @param type libextractor-type describing the meta data
661  * @param format basic format information about data
662  * @param data_mime_type mime-type of @a data (not of the original file);
663  *        can be NULL (if mime-type is not known)
664  * @param data actual meta-data found
665  * @param data_len number of bytes in @a data
666  * @return non-zero if printing failed, otherwise 0.
667  */
668 _EXTRACTOR_EXTERN int
669 EXTRACTOR_meta_data_print (void *handle,
670                            const char *plugin_name,
671                            enum EXTRACTOR_MetaType type,
672                            enum EXTRACTOR_MetaFormat format,
673                            const char *data_mime_type,
674                            const char *data,
675                            size_t data_len);
676 
677 
678 #if 0 /* keep Emacsens' auto-indent happy */
679 {
680 #endif
681 #ifdef __cplusplus
682 }
683 #endif
684 
685 #endif
686