1 /* 2 This file is part of libextractor. 3 Copyright (C) 2002-2017 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 21 #ifndef EXTRACTOR_H 22 #define EXTRACTOR_H 23 24 #ifdef __cplusplus 25 extern "C" { 26 #if 0 /* keep Emacsens' auto-indent happy */ 27 } 28 #endif 29 #endif 30 31 32 #include <stdint.h> 33 34 /** 35 * 0.2.6-1 => 0x00020601 36 * 4.5.2-0 => 0x04050200 37 */ 38 #define EXTRACTOR_VERSION 0x010B0000 39 40 #include <stdio.h> 41 42 #ifndef _EXTRACTOR_EXTERN 43 #if defined(_WIN32) && defined(MHD_W32LIB) 44 #define _EXTRACTOR_EXTERN extern 45 #elif defined (_WIN32) && defined(MHD_W32DLL) 46 /* Define MHD_W32DLL when using MHD as W32 .DLL to speed up linker a little */ 47 #define _EXTRACTOR_EXTERN __declspec(dllimport) 48 #else 49 #define _EXTRACTOR_EXTERN extern 50 #endif 51 #endif 52 53 /** 54 * Options for how plugin execution should be done. 55 */ 56 enum EXTRACTOR_Options 57 { 58 59 /** 60 * Run plugin out-of-process, starting the process once the plugin 61 * is to be run. If a plugin crashes, automatically restart the 62 * respective process for the same file and try once more 63 * (since the crash may be caused by the previous file). If 64 * the process crashes immediately again, it is not restarted 65 * until the next file. 66 */ 67 EXTRACTOR_OPTION_DEFAULT_POLICY = 0, 68 69 /** 70 * Deprecated option. Ignored. 71 */ 72 EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART = 1, 73 74 /** 75 * Run plugins in-process. Unsafe, not recommended, 76 * can be nice for debugging. 77 */ 78 EXTRACTOR_OPTION_IN_PROCESS = 2, 79 80 /** 81 * Internal value for plugins that have been disabled. 82 */ 83 EXTRACTOR_OPTION_DISABLED = 3 84 85 }; 86 87 88 /** 89 * Format in which the extracted meta data is presented. 90 */ 91 enum EXTRACTOR_MetaFormat 92 { 93 /** 94 * Format is unknown. 95 */ 96 EXTRACTOR_METAFORMAT_UNKNOWN = 0, 97 98 /** 99 * 0-terminated, UTF-8 encoded string. "data_len" 100 * is strlen(data)+1. 101 */ 102 EXTRACTOR_METAFORMAT_UTF8 = 1, 103 104 /** 105 * Some kind of binary format, see given Mime type. 106 */ 107 EXTRACTOR_METAFORMAT_BINARY = 2, 108 109 /** 110 * 0-terminated string. The specific encoding is unknown. 111 * "data_len" is strlen (data)+1. 112 */ 113 EXTRACTOR_METAFORMAT_C_STRING = 3 114 115 }; 116 117 118 /** 119 * Enumeration defining various sources of keywords. See also 120 * http://dublincore.org/documents/1998/09/dces/ 121 * 122 * @defgroup types meta data types 123 * @{ 124 */ 125 enum EXTRACTOR_MetaType 126 { 127 /* fundamental types */ 128 EXTRACTOR_METATYPE_RESERVED = 0, 129 EXTRACTOR_METATYPE_MIMETYPE = 1, 130 EXTRACTOR_METATYPE_FILENAME = 2, 131 EXTRACTOR_METATYPE_COMMENT = 3, 132 133 /* Standard types from bibtex */ 134 EXTRACTOR_METATYPE_TITLE = 4, 135 EXTRACTOR_METATYPE_BOOK_TITLE = 5, 136 EXTRACTOR_METATYPE_BOOK_EDITION = 6, 137 EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER = 7, 138 EXTRACTOR_METATYPE_JOURNAL_NAME = 8, 139 EXTRACTOR_METATYPE_JOURNAL_VOLUME = 9, 140 EXTRACTOR_METATYPE_JOURNAL_NUMBER = 10, 141 EXTRACTOR_METATYPE_PAGE_COUNT = 11, 142 EXTRACTOR_METATYPE_PAGE_RANGE = 12, 143 EXTRACTOR_METATYPE_AUTHOR_NAME = 13, 144 EXTRACTOR_METATYPE_AUTHOR_EMAIL = 14, 145 EXTRACTOR_METATYPE_AUTHOR_INSTITUTION = 15, 146 EXTRACTOR_METATYPE_PUBLISHER = 16, 147 EXTRACTOR_METATYPE_PUBLISHER_ADDRESS = 17, 148 EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION = 18, 149 EXTRACTOR_METATYPE_PUBLISHER_SERIES = 19, 150 EXTRACTOR_METATYPE_PUBLICATION_TYPE = 20, 151 EXTRACTOR_METATYPE_PUBLICATION_YEAR = 21, 152 EXTRACTOR_METATYPE_PUBLICATION_MONTH = 22, 153 EXTRACTOR_METATYPE_PUBLICATION_DAY = 23, 154 EXTRACTOR_METATYPE_PUBLICATION_DATE = 24, 155 EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25, 156 EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26, 157 EXTRACTOR_METATYPE_LANGUAGE = 27, 158 EXTRACTOR_METATYPE_CREATION_TIME = 28, 159 EXTRACTOR_METATYPE_URL = 29, 160 161 /* "unique" document identifiers */ 162 EXTRACTOR_METATYPE_URI = 30, 163 EXTRACTOR_METATYPE_ISRC = 31, 164 EXTRACTOR_METATYPE_HASH_MD4 = 32, 165 EXTRACTOR_METATYPE_HASH_MD5 = 33, 166 EXTRACTOR_METATYPE_HASH_SHA0 = 34, 167 EXTRACTOR_METATYPE_HASH_SHA1 = 35, 168 EXTRACTOR_METATYPE_HASH_RMD160 = 36, 169 170 /* identifiers of a location */ 171 EXTRACTOR_METATYPE_GPS_LATITUDE_REF = 37, 172 EXTRACTOR_METATYPE_GPS_LATITUDE = 38, 173 EXTRACTOR_METATYPE_GPS_LONGITUDE_REF = 39, 174 EXTRACTOR_METATYPE_GPS_LONGITUDE = 40, 175 EXTRACTOR_METATYPE_LOCATION_CITY = 41, 176 EXTRACTOR_METATYPE_LOCATION_SUBLOCATION = 42, 177 EXTRACTOR_METATYPE_LOCATION_COUNTRY = 43, 178 EXTRACTOR_METATYPE_LOCATION_COUNTRY_CODE = 44, 179 180 /* generic attributes */ 181 EXTRACTOR_METATYPE_UNKNOWN = 45, 182 EXTRACTOR_METATYPE_DESCRIPTION = 46, 183 EXTRACTOR_METATYPE_COPYRIGHT = 47, 184 EXTRACTOR_METATYPE_RIGHTS = 48, 185 EXTRACTOR_METATYPE_KEYWORDS = 49, 186 EXTRACTOR_METATYPE_ABSTRACT = 50, 187 EXTRACTOR_METATYPE_SUMMARY = 51, 188 EXTRACTOR_METATYPE_SUBJECT = 52, 189 EXTRACTOR_METATYPE_CREATOR = 53, 190 EXTRACTOR_METATYPE_FORMAT = 54, 191 EXTRACTOR_METATYPE_FORMAT_VERSION = 55, 192 193 /* processing history */ 194 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE = 56, 195 EXTRACTOR_METATYPE_UNKNOWN_DATE = 57, 196 EXTRACTOR_METATYPE_CREATION_DATE = 58, 197 EXTRACTOR_METATYPE_MODIFICATION_DATE = 59, 198 EXTRACTOR_METATYPE_LAST_PRINTED = 60, 199 EXTRACTOR_METATYPE_LAST_SAVED_BY = 61, 200 EXTRACTOR_METATYPE_TOTAL_EDITING_TIME = 62, 201 EXTRACTOR_METATYPE_EDITING_CYCLES = 63, 202 EXTRACTOR_METATYPE_MODIFIED_BY_SOFTWARE = 64, 203 EXTRACTOR_METATYPE_REVISION_HISTORY = 65, 204 205 EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE = 66, 206 EXTRACTOR_METATYPE_FINDER_FILE_TYPE = 67, 207 EXTRACTOR_METATYPE_FINDER_FILE_CREATOR = 68, 208 209 /* software package specifics (deb, rpm, tgz, elf) */ 210 EXTRACTOR_METATYPE_PACKAGE_NAME = 69, 211 EXTRACTOR_METATYPE_PACKAGE_VERSION = 70, 212 EXTRACTOR_METATYPE_SECTION = 71, 213 EXTRACTOR_METATYPE_UPLOAD_PRIORITY = 72, 214 EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY = 73, 215 EXTRACTOR_METATYPE_PACKAGE_CONFLICTS = 74, 216 EXTRACTOR_METATYPE_PACKAGE_REPLACES = 75, 217 EXTRACTOR_METATYPE_PACKAGE_PROVIDES = 76, 218 EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS = 77, 219 EXTRACTOR_METATYPE_PACKAGE_SUGGESTS = 78, 220 EXTRACTOR_METATYPE_PACKAGE_MAINTAINER = 79, 221 EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE = 80, 222 EXTRACTOR_METATYPE_PACKAGE_SOURCE = 81, 223 EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL = 82, 224 EXTRACTOR_METATYPE_TARGET_ARCHITECTURE = 83, 225 EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY = 84, 226 EXTRACTOR_METATYPE_LICENSE = 85, 227 EXTRACTOR_METATYPE_PACKAGE_DISTRIBUTION = 86, 228 EXTRACTOR_METATYPE_BUILDHOST = 87, 229 EXTRACTOR_METATYPE_VENDOR = 88, 230 EXTRACTOR_METATYPE_TARGET_OS = 89, 231 EXTRACTOR_METATYPE_SOFTWARE_VERSION = 90, 232 EXTRACTOR_METATYPE_TARGET_PLATFORM = 91, 233 EXTRACTOR_METATYPE_RESOURCE_TYPE = 92, 234 EXTRACTOR_METATYPE_LIBRARY_SEARCH_PATH = 93, 235 EXTRACTOR_METATYPE_LIBRARY_DEPENDENCY = 94, 236 237 /* photography specifics */ 238 EXTRACTOR_METATYPE_CAMERA_MAKE = 95, 239 EXTRACTOR_METATYPE_CAMERA_MODEL = 96, 240 EXTRACTOR_METATYPE_EXPOSURE = 97, 241 EXTRACTOR_METATYPE_APERTURE = 98, 242 EXTRACTOR_METATYPE_EXPOSURE_BIAS = 99, 243 EXTRACTOR_METATYPE_FLASH = 100, 244 EXTRACTOR_METATYPE_FLASH_BIAS = 101, 245 EXTRACTOR_METATYPE_FOCAL_LENGTH = 102, 246 EXTRACTOR_METATYPE_FOCAL_LENGTH_35MM = 103, 247 EXTRACTOR_METATYPE_ISO_SPEED = 104, 248 EXTRACTOR_METATYPE_EXPOSURE_MODE = 105, 249 EXTRACTOR_METATYPE_METERING_MODE = 106, 250 EXTRACTOR_METATYPE_MACRO_MODE = 107, 251 EXTRACTOR_METATYPE_IMAGE_QUALITY = 108, 252 EXTRACTOR_METATYPE_WHITE_BALANCE = 109, 253 EXTRACTOR_METATYPE_ORIENTATION = 110, 254 EXTRACTOR_METATYPE_MAGNIFICATION = 111, 255 256 /* image specifics */ 257 EXTRACTOR_METATYPE_IMAGE_DIMENSIONS = 112, 258 EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE = 113, 259 EXTRACTOR_METATYPE_THUMBNAIL = 114, 260 EXTRACTOR_METATYPE_IMAGE_RESOLUTION = 115, 261 EXTRACTOR_METATYPE_SOURCE = 116, 262 263 /* (text) document processing specifics */ 264 EXTRACTOR_METATYPE_CHARACTER_SET = 117, 265 EXTRACTOR_METATYPE_LINE_COUNT = 118, 266 EXTRACTOR_METATYPE_PARAGRAPH_COUNT = 119, 267 EXTRACTOR_METATYPE_WORD_COUNT = 120, 268 EXTRACTOR_METATYPE_CHARACTER_COUNT = 121, 269 EXTRACTOR_METATYPE_PAGE_ORIENTATION = 122, 270 EXTRACTOR_METATYPE_PAPER_SIZE = 123, 271 EXTRACTOR_METATYPE_TEMPLATE = 124, 272 EXTRACTOR_METATYPE_COMPANY = 125, 273 EXTRACTOR_METATYPE_MANAGER = 126, 274 EXTRACTOR_METATYPE_REVISION_NUMBER = 127, 275 276 /* music / video specifics */ 277 EXTRACTOR_METATYPE_DURATION = 128, 278 EXTRACTOR_METATYPE_ALBUM = 129, 279 EXTRACTOR_METATYPE_ARTIST = 130, 280 EXTRACTOR_METATYPE_GENRE = 131, 281 EXTRACTOR_METATYPE_TRACK_NUMBER = 132, 282 EXTRACTOR_METATYPE_DISC_NUMBER = 133, 283 EXTRACTOR_METATYPE_PERFORMER = 134, 284 EXTRACTOR_METATYPE_CONTACT_INFORMATION = 135, 285 EXTRACTOR_METATYPE_SONG_VERSION = 136, 286 EXTRACTOR_METATYPE_PICTURE = 137, 287 EXTRACTOR_METATYPE_COVER_PICTURE = 138, 288 EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE = 139, 289 EXTRACTOR_METATYPE_EVENT_PICTURE = 140, 290 EXTRACTOR_METATYPE_LOGO = 141, 291 EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM = 142, 292 EXTRACTOR_METATYPE_SOURCE_DEVICE = 143, 293 EXTRACTOR_METATYPE_DISCLAIMER = 144, 294 EXTRACTOR_METATYPE_WARNING = 145, 295 EXTRACTOR_METATYPE_PAGE_ORDER = 146, 296 EXTRACTOR_METATYPE_WRITER = 147, 297 EXTRACTOR_METATYPE_PRODUCT_VERSION = 148, 298 EXTRACTOR_METATYPE_CONTRIBUTOR_NAME = 149, 299 EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 150, 300 EXTRACTOR_METATYPE_NETWORK_NAME = 151, 301 EXTRACTOR_METATYPE_SHOW_NAME = 152, 302 EXTRACTOR_METATYPE_CHAPTER_NAME = 153, 303 EXTRACTOR_METATYPE_SONG_COUNT = 154, 304 EXTRACTOR_METATYPE_STARTING_SONG = 155, 305 EXTRACTOR_METATYPE_PLAY_COUNTER = 156, 306 EXTRACTOR_METATYPE_CONDUCTOR = 157, 307 EXTRACTOR_METATYPE_INTERPRETATION = 158, 308 EXTRACTOR_METATYPE_COMPOSER = 159, 309 EXTRACTOR_METATYPE_BEATS_PER_MINUTE = 160, 310 EXTRACTOR_METATYPE_ENCODED_BY = 161, 311 EXTRACTOR_METATYPE_ORIGINAL_TITLE = 162, 312 EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163, 313 EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164, 314 EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165, 315 EXTRACTOR_METATYPE_ORIGINAL_PERFORMER = 166, 316 EXTRACTOR_METATYPE_LYRICS = 167, 317 EXTRACTOR_METATYPE_POPULARITY_METER = 168, 318 EXTRACTOR_METATYPE_LICENSEE = 169, 319 EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 170, 320 EXTRACTOR_METATYPE_MOOD = 171, 321 EXTRACTOR_METATYPE_SUBTITLE = 172, 322 323 /* GNUnet specific values (never extracted) */ 324 EXTRACTOR_METATYPE_GNUNET_DISPLAY_TYPE = 173, 325 EXTRACTOR_METATYPE_GNUNET_FULL_DATA = 174, 326 EXTRACTOR_METATYPE_RATING = 175, 327 EXTRACTOR_METATYPE_ORGANIZATION = 176, 328 EXTRACTOR_METATYPE_RIPPER = 177, 329 EXTRACTOR_METATYPE_PRODUCER = 178, 330 EXTRACTOR_METATYPE_GROUP = 179, 331 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME = 180, 332 333 EXTRACTOR_METATYPE_DISC_COUNT = 181, 334 335 EXTRACTOR_METATYPE_CODEC = 182, 336 EXTRACTOR_METATYPE_VIDEO_CODEC = 183, 337 EXTRACTOR_METATYPE_AUDIO_CODEC = 184, 338 EXTRACTOR_METATYPE_SUBTITLE_CODEC = 185, 339 340 EXTRACTOR_METATYPE_CONTAINER_FORMAT = 186, 341 342 EXTRACTOR_METATYPE_BITRATE = 187, 343 EXTRACTOR_METATYPE_NOMINAL_BITRATE = 188, 344 EXTRACTOR_METATYPE_MINIMUM_BITRATE = 189, 345 EXTRACTOR_METATYPE_MAXIMUM_BITRATE = 190, 346 347 EXTRACTOR_METATYPE_SERIAL = 191, 348 349 EXTRACTOR_METATYPE_ENCODER = 192, 350 EXTRACTOR_METATYPE_ENCODER_VERSION = 193, 351 352 EXTRACTOR_METATYPE_TRACK_GAIN = 194, 353 EXTRACTOR_METATYPE_TRACK_PEAK = 195, 354 EXTRACTOR_METATYPE_ALBUM_GAIN = 196, 355 EXTRACTOR_METATYPE_ALBUM_PEAK = 197, 356 EXTRACTOR_METATYPE_REFERENCE_LEVEL = 198, 357 358 EXTRACTOR_METATYPE_LOCATION_NAME = 199, 359 EXTRACTOR_METATYPE_LOCATION_ELEVATION = 200, 360 EXTRACTOR_METATYPE_LOCATION_HORIZONTAL_ERROR = 201, 361 EXTRACTOR_METATYPE_LOCATION_MOVEMENT_SPEED = 202, 362 EXTRACTOR_METATYPE_LOCATION_MOVEMENT_DIRECTION = 203, 363 EXTRACTOR_METATYPE_LOCATION_CAPTURE_DIRECTION = 204, 364 365 EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER = 205, 366 EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER = 206, 367 368 EXTRACTOR_METATYPE_GROUPING = 207, 369 370 EXTRACTOR_METATYPE_DEVICE_MANUFACTURER = 208, 371 EXTRACTOR_METATYPE_DEVICE_MODEL = 209, 372 373 EXTRACTOR_METATYPE_AUDIO_LANGUAGE = 210, 374 EXTRACTOR_METATYPE_CHANNELS = 211, 375 EXTRACTOR_METATYPE_SAMPLE_RATE = 212, 376 EXTRACTOR_METATYPE_AUDIO_DEPTH = 213, 377 EXTRACTOR_METATYPE_AUDIO_BITRATE = 214, 378 EXTRACTOR_METATYPE_MAXIMUM_AUDIO_BITRATE = 215, 379 380 EXTRACTOR_METATYPE_VIDEO_DIMENSIONS = 216, 381 EXTRACTOR_METATYPE_VIDEO_DEPTH = 217, 382 EXTRACTOR_METATYPE_FRAME_RATE = 218, 383 EXTRACTOR_METATYPE_PIXEL_ASPECT_RATIO = 219, 384 EXTRACTOR_METATYPE_VIDEO_BITRATE = 220, 385 EXTRACTOR_METATYPE_MAXIMUM_VIDEO_BITRATE = 221, 386 387 EXTRACTOR_METATYPE_SUBTITLE_LANGUAGE = 222, 388 EXTRACTOR_METATYPE_VIDEO_LANGUAGE = 223, 389 390 EXTRACTOR_METATYPE_TOC = 224, 391 392 EXTRACTOR_METATYPE_VIDEO_DURATION = 225, 393 EXTRACTOR_METATYPE_AUDIO_DURATION = 226, 394 EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227, 395 396 EXTRACTOR_METATYPE_AUDIO_PREVIEW = 228, 397 398 EXTRACTOR_METATYPE_NARINFO = 229, 399 EXTRACTOR_METATYPE_NAR = 230, 400 401 EXTRACTOR_METATYPE_LAST = 231 402 }; 403 404 /** @} */ /* end of meta data types */ 405 406 /** 407 * Get the textual name of the keyword. 408 * 409 * @param type meta type to get a UTF-8 string for 410 * @return NULL if the type is not known, otherwise 411 * an English (locale: C) string describing the type; 412 * translate using `dgettext ("libextractor", rval)` 413 * @ingroup types 414 */ 415 _EXTRACTOR_EXTERN const char * 416 EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type); 417 418 419 /** 420 * Get a long description for the meta type. 421 * 422 * @param type meta type to get a UTF-8 description for 423 * @return NULL if the type is not known, otherwise 424 * an English (locale: C) string describing the type; 425 * translate using `dgettext ("libextractor", rval)` 426 * @ingroup types 427 */ 428 _EXTRACTOR_EXTERN const char * 429 EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type); 430 431 432 /** 433 * Return the highest type number, exclusive as in [0,max). 434 * 435 * @return highest legal metatype number for this version of libextractor 436 * @ingroup types 437 */ 438 _EXTRACTOR_EXTERN enum EXTRACTOR_MetaType 439 EXTRACTOR_metatype_get_max (void); 440 441 442 /** 443 * Type of a function that libextractor calls for each 444 * meta data item found. 445 * 446 * @param cls closure (user-defined) 447 * @param plugin_name name of the plugin that produced this value; 448 * special values can be used (i.e. '<zlib>' for zlib being 449 * used in the main libextractor library and yielding 450 * meta data). 451 * @param type libextractor-type describing the meta data 452 * @param format basic format information about @a data 453 * @param data_mime_type mime-type of @a data (not of the original file); 454 * can be NULL (if mime-type is not known) 455 * @param data actual meta-data found 456 * @param data_len number of bytes in @a data 457 * @return 0 to continue extracting, 1 to abort 458 */ 459 typedef int 460 (*EXTRACTOR_MetaDataProcessor) (void *cls, 461 const char *plugin_name, 462 enum EXTRACTOR_MetaType type, 463 enum EXTRACTOR_MetaFormat format, 464 const char *data_mime_type, 465 const char *data, 466 size_t data_len); 467 468 469 /** 470 * Context provided for plugins that perform meta data extraction. 471 */ 472 struct EXTRACTOR_ExtractContext 473 { 474 475 /** 476 * Closure argument to pass to all callbacks. 477 */ 478 void *cls; 479 480 /** 481 * Configuration string for the plugin. 482 */ 483 const char *config; 484 485 /** 486 * Obtain a pointer to up to @a size bytes of data from the file to process. 487 * 488 * @param cls the @e cls member of this struct 489 * @param data pointer to set to the file data, set to NULL on error 490 * @param size maximum number of bytes requested 491 * @return number of bytes now available in @a data (can be smaller than @a size), 492 * -1 on error 493 */ 494 ssize_t (*read) (void *cls, 495 void **data, 496 size_t size); 497 498 499 /** 500 * Seek in the file. Use `SEEK_CUR` for @a whence and @a pos of 0 to 501 * obtain the current position in the file. 502 * 503 * @param cls the @e cls member of this struct 504 * @param pos position to seek (see 'man lseek') 505 * @param whence how to see (absolute to start, relative, absolute to end) 506 * @return new absolute position, -1 on error (i.e. desired position 507 * does not exist) 508 */ 509 int64_t (*seek) (void *cls, 510 int64_t pos, 511 int whence); 512 513 514 /** 515 * Determine the overall size of the file. 516 * 517 * @param cls the @a cls member of this struct 518 * @return overall file size, `UINT64_MAX` on error (i.e. IPC failure) 519 */ 520 uint64_t (*get_size) (void *cls); 521 522 /** 523 * Function to call on extracted data. 524 */ 525 EXTRACTOR_MetaDataProcessor proc; 526 527 }; 528 529 530 /** 531 * Signature of the extract method that each plugin 532 * must provide. 533 * 534 * @param ec extraction context provided to the plugin 535 */ 536 typedef void 537 (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext *ec); 538 539 540 /** 541 * Linked list of extractor plugins. An application builds this list 542 * by telling libextractor to load various keyword-extraction 543 * plugins. Libraries can also be unloaded (removed from this list, 544 * see #EXTRACTOR_plugin_remove). 545 */ 546 struct EXTRACTOR_PluginList; 547 548 549 /** 550 * Load the default set of plugins. The default can be changed 551 * by setting the LIBEXTRACTOR_LIBRARIES environment variable; 552 * If it is set to "env", then this function will return 553 * #EXTRACTOR_plugin_add_config (NULL, env, flags). 554 * 555 * If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt 556 * to locate the installed plugins and load all of them. 557 * The directory where the code will search for plugins is typically 558 * automatically determined; it can be specified explicitly using the 559 * "LIBEXTRACTOR_PREFIX" environment variable. 560 * 561 * This environment variable must be set to the precise directory with 562 * the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that 563 * setting the environment variable will disable all of the methods 564 * that are typically used to determine the location of plugins. 565 * Multiple paths can be specified using ':' to separate them. 566 * 567 * @param flags options for all of the plugins loaded 568 * @return the default set of plugins, NULL if no plugins were found 569 */ 570 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList * 571 EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags); 572 573 574 /** 575 * Add a library for keyword extraction. 576 * 577 * @param prev the previous list of libraries, may be NULL 578 * @param library the name of the library (short handle, i.e. "mime") 579 * @param options options to give to the library 580 * @param flags options to use 581 * @return the new list of libraries, equal to prev iff an error occured 582 */ 583 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList * 584 EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList *prev, 585 const char *library, 586 const char *options, 587 enum EXTRACTOR_Options flags); 588 589 590 /** 591 * Load multiple libraries as specified by the user. 592 * 593 * @param config a string given by the user that defines which 594 * libraries should be loaded. Has the format 595 * "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*". 596 * For example, 'mp3:ogg' loads the 597 * mp3 and the ogg plugins. The '-' before the LIBRARYNAME 598 * indicates that the library should be removed from 599 * the library list. 600 * @param prev the previous list of libraries, may be NULL 601 * @param flags options to use 602 * @return the new list of libraries, equal to prev iff an error occured 603 * or if config was empty (or NULL). 604 */ 605 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList * 606 EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList *prev, 607 const char *config, 608 enum EXTRACTOR_Options flags); 609 610 611 /** 612 * Remove a plugin from a list. 613 * 614 * @param prev the current list of plugins 615 * @param library the name of the plugin to remove (short handle) 616 * @return the reduced list, unchanged if the plugin was not loaded 617 */ 618 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList * 619 EXTRACTOR_plugin_remove (struct EXTRACTOR_PluginList *prev, 620 const char *library); 621 622 623 /** 624 * Remove all plugins from the given list (destroys the list). 625 * 626 * @param plugin the list of plugins 627 */ 628 _EXTRACTOR_EXTERN void 629 EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins); 630 631 632 /** 633 * Extract keywords from a file using the given set of plugins. 634 * 635 * @param plugins the list of plugins to use 636 * @param filename the name of the file, can be NULL if @a data is not NULL 637 * @param data data of the file in memory, can be NULL (in which 638 * case libextractor will open file) if filename is not NULL 639 * @param size number of bytes in @a data, ignored if @a data is NULL 640 * @param proc function to call for each meta data item found 641 * @param proc_cls cls argument to @a proc 642 */ 643 _EXTRACTOR_EXTERN void 644 EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, 645 const char *filename, 646 const void *data, 647 size_t size, 648 EXTRACTOR_MetaDataProcessor proc, 649 void *proc_cls); 650 651 652 /** 653 * Simple #EXTRACTOR_MetaDataProcessor implementation that simply 654 * prints the extracted meta data to the given file. Only prints 655 * those keywords that are in UTF-8 format. 656 * 657 * @param handle the file to write to (`stdout`, `stderr`), must NOT be NULL, 658 * must be of type `FILE *`. 659 * @param plugin_name name of the plugin that produced this value 660 * @param type libextractor-type describing the meta data 661 * @param format basic format information about data 662 * @param data_mime_type mime-type of @a data (not of the original file); 663 * can be NULL (if mime-type is not known) 664 * @param data actual meta-data found 665 * @param data_len number of bytes in @a data 666 * @return non-zero if printing failed, otherwise 0. 667 */ 668 _EXTRACTOR_EXTERN int 669 EXTRACTOR_meta_data_print (void *handle, 670 const char *plugin_name, 671 enum EXTRACTOR_MetaType type, 672 enum EXTRACTOR_MetaFormat format, 673 const char *data_mime_type, 674 const char *data, 675 size_t data_len); 676 677 678 #if 0 /* keep Emacsens' auto-indent happy */ 679 { 680 #endif 681 #ifdef __cplusplus 682 } 683 #endif 684 685 #endif 686