1 /*************************************************************** 2 Copyright (C) 2006-2014 Hewlett-Packard Development Company, L.P. 3 Copyright (C) 2014, Siemens AG 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License 7 version 2 as published by the Free Software Foundation. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License along 15 with this program; if not, write to the Free Software Foundation, Inc., 16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 18 ***************************************************************/ 19 20 /** 21 * \dir 22 * \brief Nomos agent source 23 * \file 24 * \brief Nomos header file 25 * \page nomos Nomos agent 26 * \tableofcontents 27 * \section nomosabout About 28 * Nomos does license identification using short phrases (regular expressions) 29 * and heuristics (e.g. phrase must be found in (or out of) proximity to 30 * another phrase or phrases). 31 * 32 * Nomos may also identify a "style" type of license if it has similarities 33 * with a known license type. 34 * 35 * The signatures which uniquely identify a license are stored in `STRINGS.in` 36 * file. 37 * 38 * If you wish to contribute to Nomos, please read [How to add a new license 39 * signature](https://github.com/fossology/fossology/wiki/Nomos). 40 * 41 * \subsection nomosdebug Debugging 42 * Activating the defines of PROC_TRACE and/or DOCTOR_DEBUG (see line 31 of 43 * file parse.c). Nomos generates lot of tracing information that is really 44 * helpful to debug it. 45 * 46 * `PROC_TRACE` will show you, for example, which regex's were tried and which 47 * are successful. To see the successful matches, grep the output file for 48 * "addRef". 49 * 50 * `DOCTOR_DEBUG` will show you the before and after versions of the buffer to 51 * be processed. Look in the output file or `----- [Dr-BEFORE:] -----` and 52 * `+++++ [Dr-AFTER] +++:` 53 * 54 * \section nomosactions Supported actions 55 * 56 * | Command line flag | Description | 57 * | ---: | :--- | 58 * | -i | Initialize the database, then exit. | 59 * | -c | Specify the directory for the system configuration. | 60 * | -l | Print full file path (command line only). | 61 * | -v | Verbose (-vv = more verbose). | 62 * | -J | Output in JSON. | 63 * | -S | Print Highlightinfo to stdout . | 64 * | file | If files are listed, print the licenses detected within them. | 65 * | no file | Process data from the scheduler. | 66 * | -V | Print the version info, then exit. | 67 * | -d | Specify a directory to scan. | 68 * | -n | Spaw n - 1 child processes to run, there will be n running 69 * processes(the parent and n - 1 children). \n The default n is 2(when n is 70 * less than 2 or not setting, will be changed to 2) when -d is specified. | 71 * \section nomossource Agent source 72 * - \link src/nomos/agent \endlink 73 * - \link src/nomos/ui \endlink 74 * - Functional test cases \link src/nomos/agent_tests/Functional \endlink 75 * - Unit test cases \link src/nomos/agent_tests/Unit \endlink 76 */ 77 78 #ifndef _NOMOS_H 79 #define _NOMOS_H 1 80 #ifndef _GNU_SOURCE 81 #define _GNU_SOURCE 82 #endif /* not defined _GNU_SOURCE */ 83 #include <glib.h> 84 #include <stdio.h> 85 #include <assert.h> 86 #include <stdlib.h> 87 #include <search.h> 88 #include <unistd.h> 89 #include <fcntl.h> 90 #include <string.h> 91 #include <strings.h> 92 #include <ctype.h> 93 #include <dirent.h> 94 #include <ftw.h> 95 #include <regex.h> 96 #include <getopt.h> 97 #include <time.h> 98 #include <libgen.h> 99 #include <errno.h> 100 #include <sys/types.h> 101 #include <sys/stat.h> 102 #include <sys/wait.h> 103 #include <sys/mman.h> 104 #include <sys/time.h> 105 #include "nomos_gap.h" 106 #include <stdbool.h> 107 #include <semaphore.h> 108 #include <stdbool.h> 109 #include "json_writer.h" 110 111 /** Use nomos in standalone mode (no FOSSology DB) */ 112 #ifdef STANDALONE 113 #include "standalone.h" 114 #else 115 #include <libfossology.h> 116 #include <libfossdbmanager.h> 117 #endif 118 119 /** 120 * To use our local version of debug-malloc(), compile -DMEMORY_TRACING 121 */ 122 #ifdef MEMORY_TRACING 123 #include "DMalloc.h" 124 #endif /* MEMORY_TRACING */ 125 126 #define PRECHECK 127 #define GPLV2_BEATS_GPLV3 128 #define SAVE_UNCLASSIFIED_LICENSES 129 /*#define FLAG_NO_COPYRIGHT*/ 130 131 #ifdef PROC_TRACE_SWITCH 132 #define PROC_TRACE 133 #endif /* PROC_TRACE_SWITCH */ 134 135 #define myBUFSIZ 4096 ///< Buffer max length 136 #define MAX_RENAME 1000 ///< Max rename length 137 #define TEMP_FILE_LEN 100 ///< Max temp file length 138 139 /** MAX_SCANBYTES is the maximum number of bytes that will be scanned 140 * in a file. Historically, we have never found a license more than 141 * 64k into a file. 142 */ 143 #define MAX_SCANBYTES 1024*1024 144 145 /** 146 * Program options and flags 147 * 148 * MD: I think these are used when making nomos 149 */ 150 #define OPTS_DEBUG 0x1 151 #define OPTS_TRACE_SWITCH 0x2 152 #define OPTS_LONG_CMD_OUTPUT 0x4 153 #define OPTS_HIGHLIGHT_STDOUT 0x8 154 #define OPTS_NO_HIGHLIGHTINFO 0x10 155 #define OPTS_JSON_OUTPUT 0x20 156 #define OPTS_SCANNING_DIRECTORY 0x40 157 158 extern char debugStr[myBUFSIZ]; ///< Debug string 159 extern char dbErrString[myBUFSIZ]; ///< DB error string 160 extern char saveLics[myBUFSIZ]; ///< License string 161 162 extern size_t hashEntries; ///< Hash entries 163 164 /** 165 Flags for program control 166 */ 167 #define FL_SAVEBASE 0x20 168 #define FL_FRAGMENT 0x40 169 #define FL_SHOWMATCH 0x80 170 #define FL_NOCOPYRIGHT 0x100 171 172 /** 173 * Names of various files/dirs created/used 174 */ 175 #define FILE_FOUND "Found.txt" 176 #define FILE_SCORES "_scores" 177 #define DEBUGLOG "/tmp/NomosDebugLog" 178 179 180 /** 181 * Symbolic Boolean value for NO 182 */ 183 #define NO 0 184 /** 185 * Symbolic Boolean value for YES 186 */ 187 #define YES 1 188 189 /* List-sorting flags */ 190 #define UNSORTED 0 191 #define SORT_BY_NAME 1 192 #define SORT_BY_NAME_ICASE 2 193 #define SORT_BY_COUNT_DSC 3 194 #define SORT_BY_COUNT_ASC 4 195 #define SORT_BY_ALIAS 5 196 #define SORT_BY_BASENAME 6 197 198 /* Interest level (licenses) */ 199 #define IL_HIGH 3 200 #define IL_MED 2 201 #define IL_LOW 1 202 #define IL_NONE 0 203 #define IL_INIT -1 204 205 /** 206 * license-text search results (ltsr) stuff 207 */ 208 #define LTSR_RMASK ((char) 1) /**< True if it's been matched */ 209 #define LTSR_SMASK ((char) 2) /**< True if it's been searched for */ 210 #define LTSR_YES ((char) 3) /**< Both searched, and matched */ 211 #define LTSR_NO LTSR_SMASK /**< Searched but not matched */ 212 213 /** 214 * Miscellaneous strings used in various modules 215 */ 216 #define STR_NOTPKG "None (not an rpm-format package)" 217 218 /* 219 * License-scanning limits 220 */ 221 #define _scCOMFORT 9 /**< >= 9 --> certain it's a license */ 222 #define _scINVALID 4 /**< < 4 --> probably NOT a license */ 223 224 /** 225 * LS_ = License Summaries/Strings 226 */ 227 #define LS_NONE "None" 228 #define LS_UNLIKELY "LikelyNot" 229 #define LS_NOSUM "No_license_found" 230 #define LS_UNCL "UnclassifiedLicense" 231 #define LS_NOT_PD "NOT-public-domain" 232 #define LS_PD_CLM "Public-domain" 233 #define LS_PD_CPRT "Public-domain(C)" 234 #define LS_PD_ONLY "Public-domain-ref" 235 #define LS_CPRTONLY "Misc-Copyright" 236 #define LS_TDMKONLY "Trademark-ref" 237 #define LS_LICRONLY "License-ref" 238 #define LS_PATRONLY "Patent-ref" 239 240 /* 241 * NULL values 242 */ 243 #define NULL_ITEM (item_t *) NULL ///< NULL item 244 #define NULL_LIST (list_t *) NULL ///< NULL list 245 #define NULL_FH (fh_t *) NULL ///< NULL fh 246 #define NULL_CHAR '\0' ///< NULL character 247 #define NULL_STR (char *) NULL ///< NULL string 248 249 /** 250 * Macros needed across >1 source module 251 */ 252 #define isEOL(x) (((x == '\n') || (x == '\r') || (x == '\v'))) ///< Check if x points to a EOL character 253 #define IS_HUGE(x) (x >= gl.blkUpperLimit) 254 255 256 257 #define NOMOS_TEMP "/tmp/nomos.tempdir" 258 #define NOMOS_TLOCK "/tmp/nomos.tempdir/.lock.tmp," /**< CDB, This goes away. */ 259 260 261 /** 262 * Caches memory-mapped files 263 */ 264 struct mm_cache { 265 int inUse; ///< Cache in use 266 int fd; ///< File descriptor 267 unsigned long size; ///< Size 268 void *mmPtr; ///< Memory pointer 269 char label[myBUFSIZ]; ///< Label 270 }; 271 272 273 /* 274 CDB - This is kind of tricky, the way it uses the same fields for 275 different meanings. If we had objects, we could subclass. It works 276 okay, but is just a PITA for debugging. 277 */ 278 279 /** 280 listitem item_t 281 \brief tricky data structure used for a list of 'items' 282 283 Meanings of val fields are dependent on the particular list -- 284 See #defines below for examples. 285 */ 286 struct listitem { 287 int val; 288 int val2; 289 int val3; 290 char *str; /**< primary key for list-element */ 291 void *buf; /**< alias, extra data, whatever */ 292 }; 293 typedef struct listitem item_t; 294 295 296 /** 297 Defines for the list val fields 298 */ 299 #define seqNo val 300 #define foundTool val 301 #define refCount val 302 #define num val 303 #define bStart val 304 #define iFlag val 305 #define ssComp val2 306 #define isProcessed val2 307 #define iLevel val2 308 #define nMatch val2 309 #define bLen val2 310 #define bDocLen val3 311 #define bIndex val3 312 #define bList buf 313 314 /** 315 list 316 \brief list_t type structure used to keep various lists. (e.g. there are 317 multiple lists). 318 319 */ 320 struct list { 321 char name[64]; /**< Name of the list */ 322 int used; /**< Number of items found, 0 is empty list */ 323 int size; /**< What size is this? (MD) */ 324 int ix; /**< The index for the items below */ 325 int sorted; /**< Flag to indicate how ?? (the list or the items in the 326 list?) things are sorted: SORT_BY_NAME or 327 SORT_BY_NAME_ICASE */ 328 int desc; /**< Description */ 329 item_t *items; /**< List items */ 330 }; 331 typedef struct list list_t; 332 333 /** 334 * Search string 335 */ 336 struct searchString { 337 int csLen; ///< String length 338 char *csData; ///< String data 339 }; 340 typedef struct searchString searchString_t; 341 342 /** 343 * License specification 344 */ 345 struct licenseSpec { 346 searchString_t seed; ///< License seed 347 searchString_t text; ///< License text 348 }; 349 typedef struct licenseSpec licSpec_t; 350 351 352 /** 353 \brief Structure holding data truly global in that it remains consistent 354 for each file scanned. 355 */ 356 struct globals { 357 char initwd[myBUFSIZ]; ///< CDB, would like to workaround/eliminate. 358 char progName[64]; ///< Program name 359 int progOpts; ///< CLI options 360 int flags; ///< Flags 361 int uPsize; ///< Size 362 #ifdef GLOBAL_DEBUG 363 int DEEBUG; 364 int MEM_DEEBUG; 365 #endif /* GLOBAL_DEBUG */ 366 #ifdef PROC_TRACE_SWITCH 367 int ptswitch; 368 #endif /* PROC_TRACE_SWITCH */ 369 list_t sHash; ///< Hashes 370 /* Agent-specific Things */ 371 int agentPk; ///< Agent id 372 long uploadFk; ///< Upload id 373 int arsPk; ///< Agent ars id 374 PGconn *pgConn; ///< DB Connection 375 fo_dbManager *dbManager; ///< FOSSology DB manager 376 }; 377 378 /** 379 * License match positions and license type 380 */ 381 typedef struct { 382 int start; ///< Start position of match 383 int end; ///< End position of match 384 int index; ///< Enums from index (Entrynumber) in STRINGS.in 385 } MatchPositionAndType; 386 387 /** 388 * License matches 389 */ 390 typedef struct { 391 GArray* matchPositions; ///< Match positions 392 GArray* indexList; ///< License indexes 393 char* licenceName; ///< License names 394 int licenseFileId; ///< PFile id 395 } LicenceAndMatchPositions; 396 397 398 399 /** 400 curScan 401 \brief Struct that tracks state related to current file being scanned. 402 */ 403 struct curScan { 404 char cwd[myBUFSIZ]; /**< CDB, Would like to workaround and eliminate. */ 405 char targetDir[myBUFSIZ]; /**< Directory where file is */ /* check */ 406 char targetFile[myBUFSIZ]; /**< File we're scanning (tmp file)*/ /* check */ 407 char filePath[myBUFSIZ]; /**< the original file path passed in */ 408 long pFileFk; /**< [in] pfile_fk from scheduler */ 409 char pFile[myBUFSIZ]; /**< [in] pfilename from scheduler */ 410 char *licPara; 411 char *matchBase; 412 size_t targetLen; 413 size_t cwdLen; 414 struct stat stbuf; 415 regmatch_t regm; 416 list_t regfList; 417 list_t fLicFoundMap; 418 list_t parseList; 419 list_t offList; 420 list_t lList; 421 char compLic[myBUFSIZ]; /**< the license(s) found, None or NotLikely. 422 comma separated if multiple names are found. */ 423 int nLines; 424 int cliMode; /**< boolean to indicate running from command line */ 425 char *tmpLics; /**< pointer to storage for parsed names */ 426 char *licenseList[512]; /**< list of license names found, can be a single name */ 427 428 GArray* indexList; /**< List of license indexes */ 429 GArray* theMatches; /**< List of matches */ 430 GArray* keywordPositions; /**< List of matche positions */ 431 GArray* docBufferPositionsAndOffsets; 432 int currentLicenceIndex; 433 }; 434 435 /** 436 * License pattern 437 */ 438 struct license { 439 int len; ///< Length of pattern 440 char *patt; ///< License pattern to use 441 }; 442 443 /** 444 * License text to information 445 */ 446 struct licensetext { 447 char *regex; ///< License regex 448 char *tseed; ///< unencrypted license text 449 int nAbove; 450 int nBelow; 451 int compiled; 452 int plain; 453 }; 454 typedef struct licensetext licText_t; 455 456 /** 457 * Get regex of a license text 458 */ 459 #define _REGEX(x) licText[x].regex 460 /** 461 * Get seed of a license text 462 */ 463 #define _SEED(x) licText[x].tseed 464 465 /** 466 * License scan result 467 */ 468 struct scanResults { 469 int score; ///< License match score 470 int kwbm; 471 int size; 472 int flag; ///< Flags 473 int dataOffset; 474 char fullpath[myBUFSIZ]; 475 char linkname[16]; 476 char *licenses; 477 char *relpath; 478 size_t nameOffset; 479 }; 480 typedef struct scanResults scanres_t; 481 482 /** 483 * List-based memory tags 484 */ 485 #define MTAG_UNSORTKEY "list/str (initially-UNsorted key)" 486 #define MTAG_SORTKEY "list/str (initially-sorted key)" 487 #define MTAG_LISTKEY "list/str (sorted/unsorted key)" 488 #define MTAG_REPLKEY "list/str (replaced primary key)" 489 #define MTAG_LISTBUF "list/buf (any data)" 490 #define MTAG_PATHBASE "list/buf (path basename)" 491 #define MTAG_PKGINFO "list/buf (pkg rname/type/name/vers/lic)" 492 #define MTAG_PKG_NV "list/buf (pkg name/vers)" 493 #define MTAG_MD5SUM "list/buf (distro-arch MD5SUM)" 494 #define MTAG_COUNTER "list/buf integer (counter)" 495 #define MTAG_PKGNAME "list/buf (package-name)" 496 #define MTAG_PKGVERS "list/buf (package-vers)" 497 #define MTAG_CLAIMLIC "list/buf (claimlic copy)" 498 #define MTAG_COMPLIC "list/buf (pkg compLic copy)" 499 #define MTAG_URLCOPY "list/buf (pkg URL copy)" 500 #define MTAG_FILELIC "list/buf (file-license copy)" 501 #define MTAG_FIXNAME "list/buf (fixed-package name)" 502 /** 503 * Miscellaneous memory tags 504 */ 505 #define MTAG_SEEDTEXT "search-seed text" 506 #define MTAG_SRCHTEXT "license-search text" 507 #define MTAG_MMAPFILE "mmap-file data" 508 #define MTAG_MAGICDATA "file magic description" 509 #define MTAG_PATTRS "pkg-attr buffer" 510 #define MTAG_DOUBLED "doubled (reallocated) data" 511 #define MTAG_SEARCHBUF "initial search-data buffer" 512 #define MTAG_TOOSMALL "too-small half-size buffer" 513 #define MTAG_TEXTPARA "paragraph text" 514 #define MTAG_LIST "dynamically-allocated list" 515 #define MTAG_ENV "environment variable" 516 #define MTAG_SCANRES "scan-results list" 517 518 519 /* 520 Functions defined in nomos.c, used in other files 521 */ 522 void Bail(int exitval); 523 int optionIsSet(int val); 524 525 /* 526 Global Declarations 527 */ 528 extern struct globals gl; 529 extern struct curScan cur; 530 extern licText_t licText[]; 531 extern licSpec_t licSpec[]; 532 extern int schedulerMode; /* Non-zero if being run by scheduler */ 533 534 /** 535 Declarations for using the memory debug stuff 536 */ 537 #ifdef MEMORY_TRACING 538 char *memAllocTagged(); 539 void memFreeTagged(); 540 #define memFree(x,y) memFreeTagged(x, y) 541 #define memAlloc(x,y) memAllocTagged(x, y) 542 #else /* NOT MEMORY_TRACING */ 543 #define memFree(x,/*notused*/y) free(x) 544 #define memAlloc(x,y) calloc(x, 1) 545 #endif /* NOT MEMORY_TRACING */ 546 547 /* 548 * Macros for timing - refer to findPhrase() for usage examples 549 */ 550 /* need TIMING_DECL in the declarations section of function */ 551 #define DECL_TIMER struct timeval bTV, eTV; float proctime 552 #define ZERO_TIMER memcpy((void *) &bTV, (void *) &eTV, sizeof(eTV)) 553 #define RESET_TIMER END_TIMER; ZERO_TIMER 554 #define START_TIMER RECORD_TIMER(bTV) 555 #define END_TIMER RECORD_TIMER(eTV) ; \ 556 proctime = (float) (eTV.tv_sec - bTV.tv_sec) + \ 557 ((float) (eTV.tv_usec - bTV.tv_usec) * 0.000001) 558 #define RECORD_TIMER(x) (void) gettimeofday(&x, (struct timezone *) NULL) 559 #define PRINT_TIMER(x,y) printf("%11.6f seconds: %s\n", proctime, x); \ 560 if (y) { DUMP_TIMERS; } 561 #define DUMP_TIMERS printf("[1]: %d.%06d\n", bTV.tv_sec, bTV.tv_usec); \ 562 printf("[2]: %d.%06d\n", eTV.tv_sec, eTV.tv_usec) 563 564 565 /* 566 * Cut-and-paste this stuff to turn on timing 567 */ 568 #if 0 569 #ifdef TIMING 570 DECL_TIMER; /* timer declaration */ 571 #endif 572 /* */ 573 #ifdef TIMING 574 START_TIMER; /* turn on the timer */ 575 #endif /* TIMING */ 576 /* */ 577 #ifdef TIMING 578 END_TIMER; /* stop the timer */ 579 PRINT_TIMER("unpack", 0); /* ... and report */ 580 START_TIMER; /* optionally re-start timer */ 581 #endif /* TIMING */ 582 #endif 583 584 #endif /* _NOMOS_H */ 585