1#!/usr/local/bin/python3.8 2 3from __future__ import division 4 5"""flawfinder: Find potential security flaws ("hits") in source code. 6 Usage: 7 flawfinder [options] [source_code_file]+ 8 9 See the man page for a description of the options.""" 10 11version="1.31" 12 13# The default output is as follows: 14# filename:line_number [risk_level] (type) function_name: message 15# where "risk_level" goes from 0 to 5. 0=no risk, 5=maximum risk. 16# The final output is sorted by risk level, most risky first. 17# Optionally ":column_number" can be added after the line number. 18# 19# Currently this program can only analyze C/C++ code. 20# 21# Copyright (C) 2001-2014 David A. Wheeler. 22# This is released under the 23# GNU General Public License (GPL) version 2 or later (GPLv2+): 24# 25# This program is free software; you can redistribute it and/or modify 26# it under the terms of the GNU General Public License as published by 27# the Free Software Foundation; either version 2 of the License, or 28# (at your option) any later version. 29# 30# This program is distributed in the hope that it will be useful, 31# but WITHOUT ANY WARRANTY; without even the implied warranty of 32# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 33# GNU General Public License for more details. 34# 35# You should have received a copy of the GNU General Public License 36# along with this program; if not, write to the Free Software 37# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 38 39# The Python developers did a *terrible* job when they transitioned 40# to Python version 3, as I have documented elsewhere. What's more, 41# many mechanisms that do exist do not come natively with Python 2, or 42# require version 2.6 or later (yet older versions are still in use). 43# For example, this requires Python version 2.6: 44# from __future__ import print_function 45# As a result, many Python programs (including this one) do not use Python 3. 46# The solution used here is to gradually transition the Python code 47# to Python 2 code that works across 2.2 through 2.7. A preference is 48# given to code constructs that would ALSO work in version 3.X, but 49# only if they would *already* work in Python 2, and only if they don't 50# make the code too complicated. 51 52# The plan is to eventually switch this code so that it runs unchanged 53# on both 2.X and 3.X, but that is hard to support if the version number 54# is less than 2.6, so we won't do that at this time. Instead, we'll 55# wait until versions lower than 2.6 are a distant memory, and then 56# start in that direction. Hopefully by then the developers of Python 57# will begin to make it easy to transition to newer versions of Python. 58 59import sys, re, string, getopt 60import pickle # To support load/save/diff of hitlist 61import os, glob, operator # To support filename expansion on Windows 62import os.path 63import time 64# import formatter 65 66# Program Options - these are the default values: 67show_context = 0 68minimum_level = 1 69show_immediately = 0 70show_inputs = 0 # Only show inputs? 71falsepositive = 0 # Work to remove false positives? 72allowlink = 0 # Allow symbolic links? 73skipdotdir = 1 # If 1, don't recurse into dirs beginning with "." 74 # Note: This doesn't affect the command line. 75num_links_skipped = 0 # Number of links skipped. 76num_dotdirs_skipped = 0 # Number of dotdirs skipped. 77show_columns = 0 78never_ignore = 0 # If true, NEVER ignore problems, even if directed. 79list_rules = 0 # If true, list the rules (helpful for debugging) 80patch_file = "" # File containing (unified) diff output. 81loadhitlist = None 82savehitlist = None 83diffhitlist = None 84quiet = 0 85showheading = 1 # --dataonly turns this off 86output_format = 0 # 0 = normal, 1 = html. 87single_line = 0 # 1 = singleline (can 't be 0 if html) 88omit_time = 0 # 1 = omit time-to-run (needed for testing) 89required_regex = None # If non-None, regex that must be met to report 90required_regex_compiled = None 91 92displayed_header = 0 # Have we displayed the header yet? 93num_ignored_hits = 0 # Number of ignored hits (used if never_ignore==0) 94 95def error(message): 96 sys.stderr.write("Error: %s\n"% message) 97 98 99# Support routines: find a pattern. 100# To simplify the calling convention, several global variables are used 101# and these support routines are defined, in an attempt to make the 102# actual calls simpler and clearer. 103# 104 105filename = "" # Source filename. 106linenumber = 0 # Linenumber from original file. 107ignoreline = -1 # Line number to ignore. 108sumlines = 0 # Number of lines (total) examined. 109sloc = 0 # Physical SLOC 110starttime = time.time() # Used to determine analyzed lines/second. 111 112 113line_beginning = re.compile( r'(?m)^' ) 114blank_line = re.compile( r'(?m)^\s+$' ) 115 116# Send warning message. This is written this way to work on 117# Python version 2.5 through Python 3. 118def print_warning(message): 119 sys.stderr.write("Warning: ") 120 sys.stderr.write(message) 121 sys.stderr.write("\n") 122 sys.stderr.flush() 123 124# The following code accepts unified diff format from both subversion (svn) 125# and GNU diff, which aren't well-documented. It gets filenames from 126# "Index:" if exists, else from the "+++ FILENAME ..." entry. 127# Note that this is different than some tools (which will use "+++" in 128# preference to "Index:"), but subversion's nonstandard format is easier 129# to handle this way. 130# Since they aren't well-documented, here's some info on the diff formats: 131# GNU diff format: 132# --- OLDFILENAME OLDTIMESTAMP 133# +++ NEWFILENAME NEWTIMESTAMP 134# @@ -OLDSTART,OLDLENGTH +NEWSTART,NEWLENGTH @@ 135# ... Changes where preceeding "+" is add, "-" is remove, " " is unchanged. 136# 137# ",OLDLENGTH" and ",NEWLENGTH" are optional (they default to 1). 138# GNU unified diff format doesn't normally output "Index:"; you use 139# the "+++/---" to find them (presuming the diff user hasn't used --label 140# to mess it up). 141# 142# Subversion format: 143# Index: FILENAME 144# --- OLDFILENAME (comment) 145# +++ NEWFILENAME (comment) 146# @@ -OLDSTART,OLDLENGTH +NEWSTART,NEWLENGTH @@ 147# 148# In subversion, the "Index:" always occurs, and note that paren'ed 149# comments are in the oldfilename/newfilename, NOT timestamps like 150# everyone else. 151# 152# Git format: 153# diff --git a/junk.c b/junk.c 154# index 03d668d..5b005a1 100644 155# --- a/junk.c 156# +++ b/junk.c 157# @@ -6,4 +6,5 @@ main() { 158# 159# Single Unix Spec version 3 (http://www.unix.org/single_unix_specification/) 160# does not specify unified format at all; it only defines the older 161# (obsolete) context diff format. That format DOES use "Index:", but 162# only when the filename isn't specified otherwise. 163# We're only supporting unified format directly; if you have an older diff 164# format, use "patch" to apply it, and then use "diff -u" to create a 165# unified format. 166# 167 168diff_index_filename = re.compile( r'^Index:\s+(?P<filename>.*)' ) 169diff_git_filename = re.compile( r'^diff --git a/.* b/(?P<filename>.*)$' ) 170diff_newfile = re.compile( r'^\+\+\+\s(?P<filename>.*)$' ) 171diff_hunk = re.compile( r'^@@ -\d+(,\d+)?\s+\+(?P<linenumber>\d+)[, ].*@@$' ) 172diff_line_added = re.compile( r'^\+[^+].*' ) 173diff_line_del = re.compile( r'^-[^-].*' ) 174# The "+++" newfile entries have the filename, followed by a timestamp 175# or " (comment)" postpended. 176# Timestamps can be of these forms: 177# 2005-04-24 14:21:39.000000000 -0400 178# Mon Mar 10 15:13:12 1997 179# Also, "newfile" can have " (comment)" postpended. Find and eliminate this. 180# Note that the expression below is Y10K (and Y100K) ready. :-). 181diff_findjunk = re.compile( r'^(?P<filename>.*)((\s\d\d\d\d+-\d\d-\d\d\s+\d\d:\d[0-9:.]+Z?(\s+[\-\+0-9A-Z]+)?)|(\s[A-Za-z][a-z]+\s[A-za-z][a-z]+\s\d+\s\d+:\d[0-9:.]+Z?(\s[\-\+0-9]*)?\s\d\d\d\d+)|(\s\(.*\)))\s*$') 182 183def is_svn_diff(sLine): 184 if (sLine.find('Index:') != -1): 185 return True 186 return False 187 188def is_gnu_diff(sLine): 189 if sLine.startswith('--- '): 190 return True 191 return False 192 193def is_git_diff(sLine): 194 if sLine.startswith('diff --git a'): 195 return True 196 return False 197 198def svn_diff_get_filename(sLine): 199 return diff_index_filename.match(sLine) 200 201def gnu_diff_get_filename(sLine): 202 newfile_match = diff_newfile.match(sLine) 203 if (newfile_match): 204 patched_filename = string.strip(newfile_match.group('filename')) 205 # Clean up filename - remove trailing timestamp and/or (comment). 206 return diff_findjunk.match(patched_filename) 207 return None 208 209git_splitter=' b/' 210len_git_splitter=len(git_splitter) 211 212def git_diff_get_filename(sLine): 213 return diff_git_filename.match(sLine) 214 215# For each file found in the file patch_file, keep the 216# line numbers of the new file (after patch is applied) which are added. 217# We keep this information in a hash table for a quick access later. 218# 219def load_patch_info(patch_file): 220 patch={} 221 line_counter= 0 222 initial_number= 0 223 index_statement = False # Set true if we see "Index:". 224 try: hPatch = open(patch_file, 'r') 225 except: 226 print "Error: failed to open", h(patch_file) 227 sys.exit(1) 228 229 patched_filename = "" # Name of new file patched by current hunk. 230 231 sLine = hPatch.readline() 232 #Heuristic to determine if it's a svn diff, git diff, or a GNU diff. 233 if (is_svn_diff(sLine)): 234 fn_get_filename=svn_diff_get_filename 235 elif (is_git_diff(sLine)): 236 fn_get_filename=git_diff_get_filename 237 elif (is_gnu_diff(sLine)): 238 fn_get_filename=gnu_diff_get_filename 239 else: 240 print "Error: Unrecognized patch format" 241 sys.exit(1) 242 243 while True: # Loop-and-half construct. Read a line, end loop when no more 244 245 # This is really a sequence of if ... elsif ... elsif..., but 246 # because Python forbids '=' in conditions, we do it this way. 247 filename_match = fn_get_filename(sLine) 248 if (filename_match): 249 patched_filename = string.strip(filename_match.group('filename')) 250 if (patched_file in patch): 251 error("filename occurs more than once in the patch: %s" % 252 patched_filename) 253 sys.exit(1) 254 else: 255 patch[patched_filename] = {} 256 else: 257 hunk_match = diff_hunk.match(sLine) 258 if (hunk_match): 259 if (patched_filename == ""): 260 error("wrong type of patch file : we have a line number without having seen a filename") 261 sys.exit(1) 262 initial_number= hunk_match.group('linenumber') 263 line_counter= 0 264 else: 265 line_added_match = diff_line_added.match(sLine) 266 if (line_added_match): 267 line_added = line_counter + int(initial_number) 268 patch[patched_filename][line_added] = True 269 # Let's also warn about the lines above and below this one, 270 # so that errors that "leak" into adjacent lines are caught. 271 # Besides, if you're creating a patch, you had to at least look 272 # at adjacent lines, so you're in a position to fix them. 273 patch[patched_filename][line_added - 1] = True 274 patch[patched_filename][line_added + 1] = True 275 line_counter += 1 276 else: 277 line_del_match = diff_line_del.match(sLine) 278 if (line_del_match == None): 279 line_counter += 1 280 281 sLine = hPatch.readline() 282 if (sLine == ''): break # Done reading. 283 284 return patch 285 286 287def htmlize(s): 288 # Take s, and return legal (UTF-8) HTML. 289 s1 = string.replace(s,"&","&") 290 s2 = string.replace(s1,"<","<") 291 s3 = string.replace(s2,">",">") 292 return s3 293 294def h(s): 295 # htmlize s if we're generating html, otherwise just return s. 296 if output_format: return htmlize(s) 297 else: return s 298 299def print_multi_line(text): 300 # Print text as multiple indented lines. 301 width = 78 302 prefix = " " 303 starting_position = len(prefix) + 1 304 # 305 print prefix, 306 position = starting_position 307 # 308 for w in text.split(): 309 if len(w) + position >= width: 310 print 311 print prefix, 312 position = starting_position 313 print w, 314 position = position + len(w) + 1 315 316# This matches references to CWE identifiers, so we can HTMLize them. 317# We don't refer to CWEs with one digit, so we'll only match on 2+ digits. 318link_cwe_pattern = re.compile(r'(CWE-([1-9][0-9]+))([,()])') 319 320class Hit: 321 """ 322 Each instance of Hit is a warning of some kind in a source code file. 323 See the rulesets, which define the conditions for triggering a hit. 324 Hit is initialized with a tuple containing the following: 325 hook: function to call when function name found. 326 level: (default) warning level, 0-5. 0=no problem, 5=very risky. 327 warning: warning (text saying what's the problem) 328 suggestion: suggestion (text suggesting what to do instead) 329 category: One of "buffer" (buffer overflow), "race" (race condition), 330 "tmpfile" (temporary file creation), "format" (format string). 331 Use "" if you don't have a better category. 332 url: URL fragment reference. 333 other: A dictionary with other settings. 334 335 Other settings usually set: 336 337 name: function name 338 parameter: the function parameters (0th parameter null) 339 input: set to 1 if the function inputs from external sources. 340 start: start position (index) of the function name (in text) 341 end: end position of the function name (in text) 342 filename: name of file 343 line: line number in file 344 column: column in line in file 345 context_text: text surrounding hit""" 346 347 # Set default values: 348 source_position = 2 # By default, the second parameter is the source. 349 format_position = 1 # By default, the first parameter is the format. 350 input = 0 # By default, this doesn't read input. 351 note = "" # No additional notes. 352 filename = "" # Empty string is filename. 353 extract_lookahead = 0 # Normally don't extract lookahead. 354 355 def __init__(self, data): 356 hook, level, warning, suggestion, category, url, other = data 357 self.hook, self.level = hook, level 358 self.warning, self.suggestion = warning, suggestion 359 self.category, self.url = category, url 360 # These will be set later, but I set them here so that 361 # analysis tools like PyChecker will know about them. 362 self.column = 0 363 self.line = 0 364 self.name = "" 365 self.context_text = "" 366 for key in other.keys(): 367 setattr(self, key, other[key]) 368 369 def __cmp__(self, other): 370 return (cmp(other.level, self.level) or # Highest risk first. 371 cmp(self.filename, other.filename) or 372 cmp(self.line, other.line) or 373 cmp(self.column, other.column) or 374 cmp(self.name, other.name)) 375 376 def __getitem__(self, X): # Define this so this works: "%(line)" % hit 377 return getattr(self, X) 378 379 def show(self): 380 if output_format: print "<li>", 381 sys.stdout.write(h(self.filename)) 382 383 if show_columns: print ":%(line)s:%(column)s:" % self, 384 else: print ":%(line)s:" % self, 385 386 if output_format: print "<b>", 387 # Extra space before risk level in text, makes it easier to find: 388 print " [%(level)s]" % self, 389 if output_format: print "</b>", 390 print "(%(category)s)" % self, 391 if output_format: print "<i>", 392 print h("%(name)s:" % self), 393 main_text = h("%(warning)s. " % self) 394 if output_format: # Create HTML link to CWE definitions 395 main_text = link_cwe_pattern.sub( 396 r'<a href="http://cwe.mitre.org/data/definitions/\2.html">\1</a>\3', 397 main_text) 398 if single_line: 399 print main_text, 400 if self.suggestion: print h(self.suggestion)+".", 401 print h(self.note), 402 else: 403 if self.suggestion: main_text = main_text + h(self.suggestion) + ". " 404 main_text = main_text + h(self.note) 405 print 406 print_multi_line(main_text) 407 if output_format: print "</i>", 408 print 409 if show_context: 410 if output_format: print "<pre>" 411 print h(self.context_text) 412 if output_format: print "</pre>" 413 414 415 416# The "hitlist" is the list of all hits (warnings) found so far. 417# Use add_warning to add to it. 418 419hitlist = [] 420 421def add_warning(hit): 422 global hitlist, num_ignored_hits 423 if show_inputs and not hit.input: return 424 if required_regex and (required_regex_compiled.search(hit.warning) is None): 425 return 426 if hit.level >= minimum_level: 427 if linenumber == ignoreline: 428 num_ignored_hits = num_ignored_hits + 1 429 else: 430 hitlist.append(hit) 431 if show_immediately: 432 hit.show() 433 434def internal_warn(message): 435 print h(message) 436 437# C Language Specific 438 439def extract_c_parameters(text, pos=0): 440 "Return a list of the given C function's parameters, starting at text[pos]" 441 # '(a,b)' produces ['', 'a', 'b'] 442 i = pos 443 # Skip whitespace and find the "("; if there isn't one, return []: 444 while i < len(text): 445 if text[i] == '(': break 446 elif text[i] in string.whitespace: i = i + 1 447 else: return [] 448 else: # Never found a reasonable ending. 449 return [] 450 i = i + 1 451 parameters = [""] # Insert 0th entry, so 1st parameter is parameter[1]. 452 currentstart = i 453 parenlevel = 1 454 instring = 0 # 1=in double-quote, 2=in single-quote 455 incomment = 0 456 while i < len(text): 457 c = text[i] 458 if instring: 459 if c == '"' and instring == 1: instring = 0 460 elif c == "'" and instring == 2: instring = 0 461 # if \, skip next character too. The C/C++ rules for 462 # \ are actually more complex, supporting \ooo octal and 463 # \xhh hexadecimal (which can be shortened), but we don't need to 464 # parse that deeply, we just need to know we'll stay in string mode: 465 elif c == '\\': i = i + 1 466 elif incomment: 467 if c == '*' and text[i:i+2]=='*/': 468 incomment = 0 469 i = i + 1 470 else: 471 if c == '"': instring = 1 472 elif c == "'": instring = 2 473 elif c == '/' and text[i:i+2]=='/*': 474 incomment = 1 475 i = i + 1 476 elif c == '/' and text[i:i+2]=='//': 477 while i < len(text) and text[i] != "\n": 478 i = i + 1 479 elif c == '\\' and text[i:i+2]=='\\"': i = i + 1 # Handle exposed '\"' 480 elif c == '(': parenlevel = parenlevel + 1 481 elif c == ',' and (parenlevel == 1): 482 parameters.append(string.strip( 483 p_trailingbackslashes.sub('', text[currentstart:i]))) 484 currentstart = i + 1 485 elif c == ')': 486 parenlevel = parenlevel - 1 487 if parenlevel <= 0: 488 parameters.append(string.strip( 489 p_trailingbackslashes.sub('', text[currentstart:i]))) 490 # Re-enable these for debugging: 491 # print " EXTRACT_C_PARAMETERS: ", text[pos:pos+80] 492 # print " RESULTS: ", parameters 493 return parameters 494 elif c == ';': 495 internal_warn("Parsing failed to find end of parameter list; " 496 "semicolon terminated it in %s" % text[pos:pos+200]) 497 return parameters 498 i = i + 1 499 internal_warn("Parsing failed to find end of parameter list in %s" % 500 text[pos:pos+200]) 501 502 503# These patterns match gettext() and _() for internationalization. 504# This is compiled here, to avoid constant recomputation. 505# FIXME: assumes simple function call if it ends with ")", 506# so will get confused by patterns like gettext("hi") + function("bye") 507# In practice, this doesn't seem to be a problem; gettext() is usually 508# wrapped around the entire parameter. 509# The ?s makes it posible to match multi-line strings. 510gettext_pattern = re.compile(r'(?s)^\s*' + 'gettext' + r'\s*\((.*)\)\s*$') 511undersc_pattern = re.compile(r'(?s)^\s*' + '_(T(EXT)?)?' + r'\s*\((.*)\)\s*$') 512 513def strip_i18n(text): 514 "Strip any internationalization function calls surrounding 'text', " 515 "such as gettext() and _()." 516 match = gettext_pattern.search(text) 517 if match: return string.strip(match.group(1)) 518 match = undersc_pattern.search(text) 519 if match: return string.strip(match.group(3)) 520 return text 521 522p_trailingbackslashes = re.compile( r'(\s|\\(\n|\r))*$') 523 524p_c_singleton_string = re.compile( r'^\s*L?"([^\\]|\\[^0-6]|\\[0-6]+)?"\s*$') 525 526def c_singleton_string(text): 527 "Returns true if text is a C string with 0 or 1 character." 528 if p_c_singleton_string.search(text): return 1 529 else: return 0 530 531# This string defines a C constant. 532p_c_constant_string = re.compile( r'^\s*L?"([^\\]|\\[^0-6]|\\[0-6]+)*"$') 533 534def c_constant_string(text): 535 "Returns true if text is a constant C string." 536 if p_c_constant_string.search(text): return 1 537 else: return 0 538 539 540# Precompile patterns for speed. 541 542 543def c_buffer(hit): 544 source_position = hit.source_position 545 if source_position <= len(hit.parameters)-1: 546 source=hit.parameters[source_position] 547 if c_singleton_string(source): 548 hit.level = 1 549 hit.note = "Risk is low because the source is a constant character." 550 elif c_constant_string(strip_i18n(source)): 551 hit.level = max( hit.level - 2, 1) 552 hit.note = "Risk is low because the source is a constant string." 553 add_warning(hit) 554 555 556p_dangerous_strncat = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' + 557 r'\s*(\)\s*)?(-\s*1\s*)?$') 558# This is a heuristic: constants in C are usually given in all 559# upper case letters. Yes, this need not be true, but it's true often 560# enough that it's worth using as a heuristic. 561# We check because strncat better not be passed a constant as the length! 562p_looks_like_constant = re.compile(r'^\s*[A-Z][A-Z_$0-9]+\s*(-\s*1\s*)?$') 563 564def c_strncat(hit): 565 if len(hit.parameters) > 3: 566 # A common mistake is to think that when calling strncat(dest,src,len), 567 # that "len" means the ENTIRE length of the destination. This isn't true, 568 # it must be the length of the characters TO BE ADDED at most. 569 # Which is one reason that strlcat is better than strncat. 570 # We'll detect a common case of this error; if the length parameter 571 # is of the form "sizeof(dest)", we have this error. 572 # Actually, sizeof(dest) is okay if the dest's first character is always \0, 573 # but in that case the programmer should use strncpy, NOT strncat. 574 # The following heuristic will certainly miss some dangerous cases, but 575 # it at least catches the most obvious situation. 576 # This particular heuristic is overzealous; it detects ANY sizeof, instead 577 # of only the sizeof(dest) (where dest is given in hit.parameters[1]). 578 # However, there aren't many other likely candidates for sizeof; some 579 # people use it to capture just the length of the source, but this is 580 # just as dangerous, since then it absolutely does NOT take care of 581 # the destination maximum length in general. 582 # It also detects if a constant is given as a length, if the 583 # constant follows common C naming rules. 584 length_text=hit.parameters[3] 585 if p_dangerous_strncat.search(length_text) or p_looks_like_constant.search(length_text): 586 hit.level = 5 587 hit.note = ( "Risk is high; the length parameter appears to be a constant, " + 588 "instead of computing the number of characters left.") 589 add_warning(hit) 590 return 591 c_buffer(hit) 592 593def c_printf(hit): 594 format_position = hit.format_position 595 if format_position <= len(hit.parameters)-1: 596 # Assume that translators are trusted to not insert "evil" formats: 597 source = strip_i18n(hit.parameters[format_position]) 598 if c_constant_string(source): 599 # Parameter is constant, so there's no risk of format string problems. 600 if hit.name == "snprintf" or hit.name == "vsnprintf": 601 hit.level = 1 602 hit.warning = \ 603 "On some very old systems, snprintf is incorrectly implemented " \ 604 "and permits buffer overflows; there are also incompatible " \ 605 "standard definitions of it" 606 hit.suggestion = "Check it during installation, or use something else" 607 hit.category = "port" 608 else: 609 # We'll pass it on, just in case it's needed, but at level 0 risk. 610 hit.level = 0 611 hit.note = "Constant format string, so not considered very risky (there's some residual risk, especially in a loop)." 612 add_warning(hit) 613 614 615p_dangerous_sprintf_format = re.compile(r'%-?([0-9]+|\*)?s') 616 617# sprintf has both buffer and format vulnerabilities. 618def c_sprintf(hit): 619 source_position = hit.source_position 620 if hit.parameters is None: 621 # Serious parameter problem, e.g., none, or a string constant that 622 # never finishes. 623 hit.warning = "format string parameter problem" 624 hit.suggestion = "Check if required parameters present and quotes close." 625 hit.level = 4 626 hit.category = "format" 627 hit.url = "" 628 elif source_position <= len(hit.parameters)-1: 629 source=hit.parameters[source_position] 630 if c_singleton_string(source): 631 hit.level = 1 632 hit.note = "Risk is low because the source is a constant character." 633 else: 634 source = strip_i18n(source) 635 if c_constant_string(source): 636 if not p_dangerous_sprintf_format.search(source): 637 hit.level = max( hit.level - 2, 1) 638 hit.note = "Risk is low because the source has a constant maximum length." 639 # otherwise, warn of potential buffer overflow (the default) 640 else: 641 # Ho ho - a nonconstant format string - we have a different problem. 642 hit.warning = "Potential format string problem (CWE-134)" 643 hit.suggestion = "Make format string constant" 644 hit.level = 4 645 hit.category = "format" 646 hit.url = "" 647 add_warning(hit) 648 649p_dangerous_scanf_format = re.compile(r'%s') 650p_low_risk_scanf_format = re.compile(r'%[0-9]+s') 651 652def c_scanf(hit): 653 format_position = hit.format_position 654 if format_position <= len(hit.parameters)-1: 655 # Assume that translators are trusted to not insert "evil" formats; 656 # it's not clear that translators will be messing with INPUT formats, 657 # but it's possible so we'll account for it. 658 source = strip_i18n(hit.parameters[format_position]) 659 if c_constant_string(source): 660 if p_dangerous_scanf_format.search(source): pass # Accept default. 661 elif p_low_risk_scanf_format.search(source): 662 # This is often okay, but sometimes extremely serious. 663 hit.level = 1 664 hit.warning = "It's unclear if the %s limit in the format string is small enough (CWE-120)" 665 hit.suggestion = "Check that the limit is sufficiently small, or use a different input function" 666 else: 667 # No risky scanf request. 668 # We'll pass it on, just in case it's needed, but at level 0 risk. 669 hit.level = 0 670 hit.note = "No risky scanf format detected." 671 else: 672 # Format isn't a constant. 673 hit.note = "If the scanf format is influenceable by an attacker, it's exploitable." 674 add_warning(hit) 675 676 677p_dangerous_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' + 678 r'\s*(\)\s*)?(-\s*1\s*)?$') 679p_safe_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+\s*(\)\s*)?' + 680 r'/\s*sizeof\s*\(\s*?[A-Za-z_$0-9]+\s*' + 681 r'\[\s*0\s*\]\)\s*(-\s*1\s*)?$') 682 683def c_multi_byte_to_wide_char(hit): 684 # Unfortunately, this doesn't detect bad calls when it's a #define or 685 # constant set by a sizeof(), but trying to do so would create 686 # FAR too many false positives. 687 if len(hit.parameters)-1 >= 6: 688 num_chars_to_copy=hit.parameters[6] 689 if p_dangerous_multi_byte.search(num_chars_to_copy): 690 hit.level = 5 691 hit.note = ("Risk is high, it appears that the size is given as bytes, but the " + 692 "function requires size as characters.") 693 elif p_safe_multi_byte.search(num_chars_to_copy): 694 # This isn't really risk-free, since it might not be the destination, 695 # or the destination might be a character array (if it's a char pointer, 696 # the pattern is actually quite dangerous, but programmers 697 # are unlikely to make that error). 698 hit.level = 1 699 hit.note = "Risk is very low, the length appears to be in characters not bytes." 700 add_warning(hit) 701 702p_null_text = re.compile(r'^ *(NULL|0|0x0) *$') 703 704def c_hit_if_null(hit): 705 null_position = hit.check_for_null 706 if null_position <= len(hit.parameters)-1: 707 null_text=hit.parameters[null_position] 708 if p_null_text.search(null_text): 709 add_warning(hit) 710 else: 711 return 712 add_warning(hit) # If insufficient # of parameters. 713 714p_static_array = re.compile(r'^[A-Za-z_]+\s+[A-Za-z0-9_$,\s\*()]+\[[^]]') 715 716def c_static_array(hit): 717 # This is cheating, but it does the job for most real code. 718 # In some cases it will match something that it shouldn't. 719 # We don't match ALL arrays, just those of certain types (e.g., char). 720 # In theory, any array can overflow, but in practice it seems that 721 # certain types are far more prone to problems, so we just report those. 722 if p_static_array.search(hit.lookahead): 723 add_warning(hit) # Found a static array, warn about it. 724 725def normal(hit): 726 add_warning(hit) 727 728 729# "c_ruleset": the rules for identifying "hits" in C (potential warnings). 730# It's a dictionary, where the key is the function name causing the hit, 731# and the value is a tuple with the following format: 732# (hook, level, warning, suggestion, category, {other}) 733# See the definition for class "Hit". 734# The key can have multiple values separated with "|". 735 736c_ruleset = { 737 "strcpy" : 738 (c_buffer, 4, 739 "Does not check for buffer overflows when copying to destination (CWE-120)", 740 "Consider using strcpy_s, strncpy, or strlcpy (warning, strncpy is easily misused)", 741 "buffer", "", {}), 742 "lstrcpy|wcscpy|_tcscpy|_mbscpy" : 743 (c_buffer, 4, 744 "Does not check for buffer overflows when copying to destination (CWE-120)", 745 "Consider using a function version that stops copying at the end of the buffer", 746 "buffer", "", {}), 747 "memcpy|CopyMemory|bcopy" : 748 (normal, 2, # I've found this to have a lower risk in practice. 749 "Does not check for buffer overflows when copying to destination (CWE-120)", 750 "Make sure destination can always hold the source data", 751 "buffer", "", {}), 752 "strcat" : 753 (c_buffer, 4, 754 "Does not check for buffer overflows when concatenating to destination (CWE-120)", 755 "Consider using strcat_s, strncat, or strlcat (warning, strncat is easily misused)", 756 "buffer", "", {}), 757 "lstrcat|wcscat|_tcscat|_mbscat" : 758 (c_buffer, 4, 759 "Does not check for buffer overflows when concatenating to destination (CWE-120)", 760 "", 761 "buffer", "", {}), 762 "strncpy" : 763 (c_buffer, 764 1, # Low risk level, because this is often used correctly when FIXING security 765 # problems, and raising it to a higher risk level would cause many false positives. 766 "Easily used incorrectly; doesn't always \\0-terminate or " + 767 "check for invalid pointers (CWE-120)", 768 "", 769 "buffer", "", {}), 770 "lstrcpyn|wcsncpy|_tcsncpy|_mbsnbcpy" : 771 (c_buffer, 772 1, # Low risk level, because this is often used correctly when FIXING security 773 # problems, and raising it to a higher risk levle would cause many false positives. 774 "Easily used incorrectly; doesn't always \\0-terminate or " + 775 "check for invalid pointers (CWE-120)", 776 "", 777 "buffer", "", {}), 778 "strncat" : 779 (c_strncat, 780 1, # Low risk level, because this is often used correctly when 781 # FIXING security problems, and raising it to a 782 # higher risk level would cause many false positives. 783 "Easily used incorrectly (e.g., incorrectly computing the correct maximum size to add) (CWE-120)", 784 "Consider strcat_s, strlcat, or automatically resizing strings", 785 "buffer", "", {}), 786 "lstrcatn|wcsncat|_tcsncat|_mbsnbcat" : 787 (c_strncat, 788 1, # Low risk level, because this is often used correctly when FIXING security 789 # problems, and raising it to a higher risk level would cause many false positives. 790 "Easily used incorrectly (e.g., incorrectly computing the correct maximum size to add) (CWE-120)", 791 "Consider strcat_s, strlcat, or automatically resizing strings", 792 "buffer", "", {}), 793 "strccpy|strcadd": 794 (normal, 1, 795 "Subject to buffer overflow if buffer is not as big as claimed (CWE-120)", 796 "Ensure that destination buffer is sufficiently large", 797 "buffer", "", {}), 798 "char|TCHAR|wchar_t": # This isn't really a function call, but it works. 799 (c_static_array, 2, 800 "Statically-sized arrays can be improperly restricted, " + 801 "leading to potential overflows or other issues (CWE-119:CWE-120)", 802 "Perform bounds checking, use functions that limit length, " + 803 "or ensure that the size is larger than the maximum possible length", 804 "buffer", "", {'extract_lookahead' : 1}), 805 806 "gets|_getts": 807 (normal, 5, "Does not check for buffer overflows (CWE-120, CWE-20)", 808 "Use fgets() instead", "buffer", "", {'input' : 1}), 809 810 # The "sprintf" hook will raise "format" issues instead if appropriate: 811 "sprintf|vsprintf|swprintf|vswprintf|_stprintf|_vstprintf": 812 (c_sprintf, 4, 813 "Does not check for buffer overflows (CWE-120)", 814 "Use sprintf_s, snprintf, or vsnprintf", 815 "buffer", "", {}), 816 817 "printf|vprintf|vwprintf|vfwprintf|_vtprintf|wprintf": 818 (c_printf, 4, 819 "If format strings can be influenced by an attacker, they can be exploited (CWE-134)", 820 "Use a constant for the format specification", 821 "format", "", {}), 822 823 "fprintf|vfprintf|_ftprintf|_vftprintf|fwprintf|fvwprintf": 824 (c_printf, 4, 825 "If format strings can be influenced by an attacker, they can be exploited (CWE-134)", 826 "Use a constant for the format specification", 827 "format", "", { 'format_position' : 2}), 828 829 # The "syslog" hook will raise "format" issues. 830 "syslog": 831 (c_printf, 4, 832 "If syslog's format strings can be influenced by an attacker, " + 833 "they can be exploited (CWE-134)", 834 "Use a constant format string for syslog", 835 "format", "", { 'format_position' : 2} ), 836 837 "snprintf|vsnprintf|_snprintf|_sntprintf|_vsntprintf": 838 (c_printf, 4, 839 "If format strings can be influenced by an attacker, they can be " + 840 "exploited, and note that sprintf variations do not always \\0-terminate (CWE-134)", 841 "Use a constant for the format specification", 842 "format", "", { 'format_position' : 3}), 843 844 "scanf|vscanf|wscanf|_tscanf|vwscanf": 845 (c_scanf, 4, 846 "The scanf() family's %s operation, without a limit specification, " + 847 "permits buffer overflows (CWE-120, CWE-20)", 848 "Specify a limit to %s, or use a different input function", 849 "buffer", "", {'input' : 1}), 850 851 "fscanf|sscanf|vsscanf|vfscanf|_ftscanf|fwscanf|vfwscanf|vswscanf": 852 (c_scanf, 4, 853 "The scanf() family's %s operation, without a limit specification, " 854 "permits buffer overflows (CWE-120, CWE-20)", 855 "Specify a limit to %s, or use a different input function", 856 "buffer", "", {'input' : 1, 'format_position' : 2}), 857 858 "strlen|wcslen|_tcslen|_mbslen" : 859 (normal, 860 1, # Often this isn't really a risk, and even when, it usually at worst causes 861 # program crash (and nothing worse). 862 "Does not handle strings that are not \\0-terminated; " + 863 "if given one it may perform an over-read (it could cause a crash " + 864 "if unprotected) (CWE-126)", 865 "", 866 "buffer", "", {}), 867 868 "MultiByteToWideChar" : # Windows 869 (c_multi_byte_to_wide_char, 870 2, # Only the default - this will be changed in many cases. 871 "Requires maximum length in CHARACTERS, not bytes (CWE-120)", 872 "", 873 "buffer", "", {}), 874 875 "streadd|strecpy": 876 (normal, 4, 877 "This function does not protect against buffer overflows (CWE-120)", 878 "Ensure the destination has 4 times the size of the source, to leave room for expansion", 879 "buffer", "dangers-c", {}), 880 881 "strtrns": 882 (normal, 3, 883 "This function does not protect against buffer overflows (CWE-120)", 884 "Ensure that destination is at least as long as the source", 885 "buffer", "dangers-c", {}), 886 887 "realpath": 888 (normal, 3, 889 "This function does not protect against buffer overflows, " + 890 "and some implementations can overflow internally (CWE-120/CWE-785)", 891 "Ensure that the destination buffer is at least of size MAXPATHLEN, and" + 892 "to protect against implementation problems, the input argument should also " + 893 "be checked to ensure it is no larger than MAXPATHLEN", 894 "buffer", "dangers-c", {}), 895 896 "getopt|getopt_long": 897 (normal, 3, 898 "Some older implementations do not protect against internal buffer overflows (CWE-120, CWE-20)", 899 "Check implementation on installation, or limit the size of all string inputs", 900 "buffer", "dangers-c", {'input' : 1}), 901 902 "getpass": 903 (normal, 3, 904 "Some implementations may overflow buffers (CWE-120, CWE-20)", 905 "", 906 "buffer", "dangers-c", {'input' : 1}), 907 908 "getwd": 909 (normal, 3, 910 "This does not protect against buffer overflows " 911 "by itself, so use with caution (CWE-120, CWE-20)", 912 "Use getcwd instead", 913 "buffer", "dangers-c", {'input' : 1}), 914 915 # fread not included here; in practice I think it's rare to mistake it. 916 "getchar|fgetc|getc|read|_gettc": 917 (normal, 1, 918 "Check buffer boundaries if used in a loop including recursive loops (CWE-120, CWE-20)", 919 "", 920 "buffer", "dangers-c", {'input' : 1}), 921 922 "access": # ???: TODO: analyze TOCTOU more carefully. 923 (normal, 4, 924 "This usually indicates a security flaw. If an " + 925 "attacker can change anything along the path between the " + 926 "call to access() and the file's actual use (e.g., by moving " + 927 "files), the attacker can exploit the race condition (CWE-362/CWE-367)", 928 "Set up the correct permissions (e.g., using setuid()) and " + 929 "try to open the file directly", 930 "race", 931 "avoid-race#atomic-filesystem", {}), 932 "chown": 933 (normal, 5, 934 "This accepts filename arguments; if an attacker " + 935 "can move those files, a race condition results. (CWE-362)", 936 "Use fchown( ) instead", 937 "race", "", {}), 938 "chgrp": 939 (normal, 5, 940 "This accepts filename arguments; if an attacker " + 941 "can move those files, a race condition results. (CWE-362)", 942 "Use fchgrp( ) instead", 943 "race", "", {}), 944 "chmod": 945 (normal, 5, 946 "This accepts filename arguments; if an attacker " + 947 "can move those files, a race condition results. (CWE-362)", 948 "Use fchmod( ) instead", 949 "race", "", {}), 950 "vfork": 951 (normal, 2, 952 "On some old systems, vfork() permits race conditions, and it's " + 953 "very difficult to use correctly (CWE-362)", 954 "Use fork() instead", 955 "race", "", {}), 956 "readlink": 957 (normal, 5, 958 "This accepts filename arguments; if an attacker " + 959 "can move those files or change the link content, " + 960 "a race condition results. " + 961 "Also, it does not terminate with ASCII NUL. (CWE-362, CWE-20)", 962 # This is often just a bad idea, and it's hard to suggest a 963 # simple alternative: 964 "Reconsider approach", 965 "race", "", {'input' : 1}), 966 967 "tmpfile": 968 (normal, 2, 969 "Function tmpfile() has a security flaw on some systems (e.g., older System V systems) (CWE-377)", 970 "", 971 "tmpfile", "", {}), 972 "tmpnam|tempnam": 973 (normal, 3, 974 "Temporary file race condition (CWE-377)", 975 "", 976 "tmpfile", "avoid-race", {}), 977 978 # TODO: Detect GNOME approach to mktemp and ignore it. 979 "mktemp": 980 (normal, 4, 981 "Temporary file race condition (CWE-377)", 982 "", 983 "tmpfile", "avoid-race", {}), 984 985 "mkstemp": 986 (normal, 2, 987 "Potential for temporary file vulnerability in some circumstances. Some older Unix-like systems create temp files with permission to write by all by default, so be sure to set the umask to override this. Also, some older Unix systems might fail to use O_EXCL when opening the file, so make sure that O_EXCL is used by the library (CWE-377)", 988 "", 989 "tmpfile", "avoid-race", {}), 990 991 "fopen|open": 992 (normal, 2, 993 "Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents? (CWE-362)", 994 "", 995 "misc", "", {}), 996 997 "umask": 998 (normal, 1, 999 "Ensure that umask is given most restrictive possible setting (e.g., 066 or 077) (CWE-732)", 1000 "", 1001 "access", "", {}), 1002 1003 # Windows. TODO: Detect correct usage approaches and ignore it. 1004 "GetTempFileName": 1005 (normal, 3, 1006 "Temporary file race condition in certain cases " + 1007 "(e.g., if run as SYSTEM in many versions of Windows) (CWE-377)", 1008 "", 1009 "tmpfile", "avoid-race", {}), 1010 1011 # TODO: Need to detect varying levels of danger. 1012 "execl|execlp|execle|execv|execvp|system|popen|WinExec|ShellExecute": 1013 (normal, 4, 1014 "This causes a new program to execute and is difficult to use safely (CWE-78)", 1015 "try using a library call that implements the same functionality " + 1016 "if available", 1017 "shell", "", {}), 1018 1019 # TODO: Need to detect varying levels of danger. 1020 "execl|execlp|execle|execv|execvp|system|popen|WinExec|ShellExecute": 1021 (normal, 4, 1022 "This causes a new program to execute and is difficult to use safely (CWE-78)", 1023 "try using a library call that implements the same functionality " + 1024 "if available", 1025 "shell", "", {}), 1026 1027 # TODO: Be more specific. The biggest problem involves "first" param NULL, 1028 # second param with embedded space. Windows. 1029 "CreateProcessAsUser|CreateProcessWithLogon": 1030 (normal, 3, 1031 "This causes a new process to execute and is difficult to use safely (CWE-78)", 1032 "Especially watch out for embedded spaces", 1033 "shell", "", {}), 1034 1035 # TODO: Be more specific. The biggest problem involves "first" param NULL, 1036 # second param with embedded space. Windows. 1037 "CreateProcess": 1038 (c_hit_if_null, 3, 1039 "This causes a new process to execute and is difficult to use safely (CWE-78)", 1040 "Specify the application path in the first argument, NOT as part of the second, " + 1041 "or embedded spaces could allow an attacker to force a different program to run", 1042 "shell", "", {'check_for_null' : 1}), 1043 1044 "atoi|atol|_wtoi|_wtoi64": 1045 (normal, 2, 1046 "Unless checked, the resulting number can exceed the expected range " + 1047 "(CWE-190)", 1048 "If source untrusted, check both minimum and maximum, even if the" + 1049 " input had no minus sign (large numbers can roll over into negative" + 1050 " number; consider saving to an unsigned value if that is intended)", 1051 "integer", "dangers-c", {}), 1052 1053 # Random values. Don't trigger on "initstate", it's too common a term. 1054 "drand48|erand48|jrand48|lcong48|lrand48|mrand48|nrand48|random|seed48|setstate|srand|strfry|srandom": 1055 (normal, 3, 1056 "This function is not sufficiently random for security-related functions such as key and nonce creation (CWE-327)", 1057 "use a more secure technique for acquiring random values", 1058 "random", "", {}), 1059 1060 "crypt": 1061 (normal, 4, 1062 "Function crypt is a poor one-way hashing algorithm; since it only accepts passwords of 8 " + 1063 "characters or less, and only a two-byte salt, it is excessively vulnerable to " + 1064 "dictionary attacks given today's faster computing equipment (CWE-327)", 1065 "Use a different algorithm, such as SHA-1, with a larger non-repeating salt", 1066 "crypto", "", {}), 1067 1068 # OpenSSL EVP calls to use DES. 1069 "EVP_des_ecb|EVP_des_cbc|EVP_des_cfb|EVP_des_ofb|EVP_desx_cbc": 1070 (normal, 4, 1071 "DES only supports a 56-bit keysize, which is too small given today's computers (CWE-327)", 1072 "Use a different patent-free encryption algorithm with a larger keysize, " + 1073 "such as 3DES or AES", 1074 "crypto", "", {}), 1075 1076 # Other OpenSSL EVP calls to use small keys. 1077 "EVP_rc4_40|EVP_rc2_40_cbc|EVP_rc2_64_cbc": 1078 (normal, 4, 1079 "These keysizes are too small given today's computers (CWE-327)", 1080 "Use a different patent-free encryption algorithm with a larger keysize, " + 1081 "such as 3DES or AES", 1082 "crypto", "", {}), 1083 1084 "chroot": 1085 (normal, 3, 1086 "chroot can be very helpful, but is hard to use correctly (CWE-250, CWE-22)", 1087 "Make sure the program immediately chdir(\"/\")," + 1088 " closes file descriptors," + 1089 " and drops root privileges, and that all necessary files" + 1090 " (and no more!) are in the new root", 1091 "misc", "", {}), 1092 1093 "getenv|curl_getenv": 1094 (normal, 3, "Environment variables are untrustable input if they can be" + 1095 " set by an attacker. They can have any content and" + 1096 " length, and the same variable can be set more than once (CWE-807, CWE-20)", 1097 "Check environment variables carefully before using them", 1098 "buffer", "", {'input' : 1}), 1099 1100 "g_get_home_dir": 1101 (normal, 3, "This function is synonymous with 'getenv(\"HOME\")';" + 1102 "it returns untrustable input if the environment can be" + 1103 "set by an attacker. It can have any content and length, " + 1104 "and the same variable can be set more than once (CWE-807, CWE-20)", 1105 "Check environment variables carefully before using them", 1106 "buffer", "", {'input' : 1}), 1107 1108 "g_get_tmp_dir": 1109 (normal, 3, "This function is synonymous with 'getenv(\"TMP\")';" + 1110 "it returns untrustable input if the environment can be" + 1111 "set by an attacker. It can have any content and length, " + 1112 "and the same variable can be set more than once (CWE-807, CWE-20)", 1113 "Check environment variables carefully before using them", 1114 "buffer", "", {'input' : 1}), 1115 1116 1117 # These are Windows-unique: 1118 1119 # TODO: Should have lower risk if the program checks return value. 1120 "RpcImpersonateClient|ImpersonateLoggedOnUser|CoImpersonateClient|" + 1121 "ImpersonateNamedPipeClient|ImpersonateDdeClientWindow|ImpersonateSecurityContext|" + 1122 "SetThreadToken": 1123 (normal, 4, "If this call fails, the program could fail to drop heightened privileges (CWE-250)", 1124 "Make sure the return value is checked, and do not continue if a failure is reported", 1125 "access", "", {}), 1126 1127 "InitializeCriticalSection": 1128 (normal, 3, "Exceptions can be thrown in low-memory situations", 1129 "Use InitializeCriticalSectionAndSpinCount instead", 1130 "misc", "", {}), 1131 1132 "EnterCriticalSection": 1133 (normal, 3, "On some versions of Windows, exceptions can be thrown in low-memory situations", 1134 "Use InitializeCriticalSectionAndSpinCount instead", 1135 "misc", "", {}), 1136 1137 "LoadLibrary|LoadLibraryEx": 1138 (normal, 3, "Ensure that the full path to the library is specified, or current directory may be used (CWE-829, CWE-20)", 1139 "Use registry entry or GetWindowsDirectory to find library path, if you aren't already", 1140 "misc", "", {'input' : 1}), 1141 1142 "SetSecurityDescriptorDacl": 1143 (c_hit_if_null, 5, 1144 "Never create NULL ACLs; an attacker can set it to Everyone (Deny All Access), " + 1145 "which would even forbid administrator access (CWE-732)", 1146 "", 1147 "misc", "", {'check_for_null' : 3}), 1148 1149 "AddAccessAllowedAce": 1150 (normal, 3, 1151 "This doesn't set the inheritance bits in the access control entry (ACE) header (CWE-732)", 1152 "Make sure that you set inheritance by hand if you wish it to inherit", 1153 "misc", "", {}), 1154 1155 "getlogin": 1156 (normal, 4, 1157 "It's often easy to fool getlogin. Sometimes it does not work at all, because some program messed up the utmp file. Often, it gives only the first 8 characters of the login name. The user currently logged in on the controlling tty of our program need not be the user who started it. Avoid getlogin() for security-related purposes (CWE-807)", 1158 "Use getpwuid(geteuid()) and extract the desired information instead", 1159 "misc", "", {}), 1160 1161 "cuserid": 1162 (normal, 4, 1163 "Exactly what cuserid() does is poorly defined (e.g., some systems use the effective uid, like Linux, while others like System V use the real uid). Thus, you can't trust what it does. It's certainly not portable (The cuserid function was included in the 1988 version of POSIX, but removed from the 1990 version). Also, if passed a non-null parameter, there's a risk of a buffer overflow if the passed-in buffer is not at least L_cuserid characters long (CWE-120)", 1164 "Use getpwuid(geteuid()) and extract the desired information instead", 1165 "misc", "", {}), 1166 1167 "getpw": 1168 (normal, 4, 1169 "This function is dangerous; it may overflow the provided buffer. It extracts data from a 'protected' area, but most systems have many commands to let users modify the protected area, and it's not always clear what their limits are. Best to avoid using this function altogether (CWE-676, CWE-120)", 1170 "Use getpwuid() instead", 1171 "buffer", "", {}), 1172 1173 "getpass": 1174 (normal, 4, 1175 "This function is obsolete and not portable. It was in SUSv2 but removed by POSIX.2. What it does exactly varies considerably between systems, particularly in where its prompt is displayed and where it gets its data (e.g., /dev/tty, stdin, stderr, etc.) (CWE-676)", 1176 "Make the specific calls to do exactly what you want. If you continue to use it, or write your own, be sure to zero the password as soon as possible to avoid leaving the cleartext password visible in the process' address space", 1177 "misc", "", {}), 1178 1179 "gsignal|ssignal": 1180 (normal, 2, 1181 "These functions are considered obsolete on most systems, and very non-poertable (Linux-based systems handle them radically different, basically if gsignal/ssignal were the same as raise/signal respectively, while System V considers them a separate set and obsolete) (CWE-676)", 1182 "Switch to raise/signal, or some other signalling approach", 1183 "obsolete", "", {}), 1184 1185 "memalign": 1186 (normal, 1, 1187 "On some systems (though not Linux-based systems) an attempt to free() results from memalign() may fail. This may, on a few systems, be exploitable. Also note that memalign() may not check that the boundary parameter is correct (CWE-676)", 1188 "Use posix_memalign instead (defined in POSIX's 1003.1d). Don't switch to valloc(); it is marked as obsolete in BSD 4.3, as legacy in SUSv2, and is no longer defined in SUSv3. In some cases, malloc()'s alignment may be sufficient", 1189 "free", "", {}), 1190 1191 "ulimit": 1192 (normal, 1, 1193 "This C routine is considered obsolete (as opposed to the shell command by the same name, which is NOT obsolete) (CWE-676)", 1194 "Use getrlimit(2), setrlimit(2), and sysconf(3) instead", 1195 "obsolete", "", {}), 1196 1197 "usleep": 1198 (normal, 1, 1199 "This C routine is considered obsolete (as opposed to the shell command by the same name). The interaction of this function with SIGALRM and other timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is unspecified (CWE-676)", 1200 "Use nanosleep(2) or setitimer(2) instead", 1201 "obsolete", "", {}), 1202 1203 1204 # Input functions, useful for -I 1205 "recv|recvfrom|recvmsg|fread|readv": 1206 (normal, 0, "Function accepts input from outside program (CWE-20)", 1207 "Make sure input data is filtered, especially if an attacker could manipulate it", 1208 "input", "", {'input' : 1}), 1209 1210 1211 # TODO: detect C++'s: cin >> charbuf, where charbuf is a char array; the problem 1212 # is that flawfinder doesn't have type information, and ">>" is safe with 1213 # many other types. 1214 # ("send" and friends aren't todo, because they send out.. not input.) 1215 # TODO: cwd("..") in user's space - TOCTOU vulnerability 1216 # TODO: There are many more rules to add, esp. for TOCTOU. 1217 } 1218 1219template_ruleset = { 1220 # This is a template for adding new entries (the key is impossible): 1221 "9": 1222 (normal, 2, 1223 "", 1224 "", 1225 "tmpfile", "", {}), 1226 } 1227 1228 1229def find_column(text, position): 1230 "Find column number inside line." 1231 newline = string.rfind(text, "\n", 0, position) 1232 if newline == -1: 1233 return position + 1 1234 else: 1235 return position - newline 1236 1237def get_context(text, position): 1238 "Get surrounding text line starting from text[position]" 1239 linestart = string.rfind(text, "\n", 0, position+1) + 1 1240 lineend = string.find(text, "\n", position, len(text)) 1241 if lineend == -1: lineend = len(text) 1242 return text[linestart:lineend] 1243 1244def c_valid_match(text, position): 1245 # Determine if this is a valid match, or a false positive. 1246 # If false positive controls aren't on, always declare it's a match: 1247 i = position 1248 while i < len(text): 1249 c = text[i] 1250 if c == '(': return 1 1251 elif c in string.whitespace: i = i + 1 1252 else: 1253 if falsepositive: return 0 # No following "(", presume invalid. 1254 if c in "=+-": 1255 # This is very unlikely to be a function use. If c is '=', 1256 # the name is followed by an assignment or is-equal operation. 1257 # Since the names of library functions are really unlikely to be 1258 # followed by an assignment statement or 'is-equal' test, 1259 # while this IS common for variable names, let's declare it invalid. 1260 # It's possible that this is a variable function pointer, pointing 1261 # to the real library function, but that's really improbable. 1262 # If c is "+" or "-", we have a + or - operation. 1263 # In theory "-" could be used for a function pointer difference 1264 # computation, but this is extremely improbable. 1265 # More likely: this is a variable in a computation, so drop it. 1266 return 0 1267 return 1 1268 return 0 # Never found anything other than "(" and whitespace. 1269 1270def process_directive(): 1271 "Given a directive, process it." 1272 global ignoreline, num_ignored_hits 1273 # TODO: Currently this is just a stub routine that simply removes 1274 # hits from the current line, if any, and sets a flag if not. 1275 # Thus, any directive is considered the "ignore" directive. 1276 # Currently that's okay because we don't have any other directives yet. 1277 if never_ignore: return 1278 hitfound = 0 1279 # Iterate backwards over hits, to be careful about the destructive iterator 1280 for i in xrange(len(hitlist)-1, -1, -1): 1281 if hitlist[i].filename == filename and hitlist[i].line == linenumber: 1282 del hitlist[i] # DESTROY - this is a DESTRUCTIVE iterator. 1283 hitfound = 1 # Don't break, because there may be more than one. 1284 num_ignored_hits = num_ignored_hits + 1 1285 if not hitfound: 1286 ignoreline = linenumber + 1 # Nothing found - ignore next line. 1287 1288# Characters that can be in a string. 1289# 0x4, 4.4e4, etc. 1290numberset=string.hexdigits+"_x.Ee" 1291 1292# Patterns for various circumstances: 1293p_whitespace = re.compile( r'[ \t\v\f]+' ) 1294p_include = re.compile( r'#\s*include\s+(<.*?>|".*?")' ) 1295p_digits = re.compile( r'[0-9]' ) 1296p_alphaunder = re.compile( r'[A-Za-z_]' ) # Alpha chars and underline. 1297# A "word" in C. Note that "$" is permitted -- it's not permitted by the 1298# C standard in identifiers, but gcc supports it as an extension. 1299p_c_word = re.compile( r'[A-Za-z_][A-Za-z_0-9$]*' ) 1300# We'll recognize ITS4 and RATS ignore directives, as well as our own, 1301# for compatibility's sake: 1302p_directive = re.compile( r'(?i)\s*(ITS4|Flawfinder|RATS):\s*([^\*]*)' ) 1303 1304max_lookahead=500 # Lookahead limit for c_static_array. 1305 1306def process_c_file(f, patch_infos): 1307 global filename, linenumber, ignoreline, sumlines, num_links_skipped 1308 global sloc 1309 filename=f 1310 linenumber = 1 1311 ignoreline = -1 1312 1313 incomment = 0 1314 instring = 0 1315 linebegin = 1 1316 codeinline = 0 # 1 when we see some code (so increment sloc at newline) 1317 1318 if ((patch_infos != None) and (not (f in patch_infos))): 1319 # This file isn't in the patch list, so don't bother analyzing it. 1320 if not quiet: 1321 if output_format: 1322 print "Skipping unpatched file ", h(f), "<br>" 1323 else: 1324 print "Skipping unpatched file", f 1325 sys.stdout.flush() 1326 return 1327 1328 if f == "-": 1329 input = sys.stdin 1330 else: 1331 # Symlinks should never get here, but just in case... 1332 if ((not allowlink) and os.path.islink(f)): 1333 print "BUG! Somehow got a symlink in process_c_file!" 1334 num_links_skipped = num_links_skipped + 1 1335 return 1336 try: 1337 input = open(f, "r") 1338 except: 1339 print "Error: failed to open", h(f) 1340 sys.exit(1) 1341 1342 # Read ENTIRE file into memory. Use readlines() to convert \n if necessary. 1343 # This turns out to be very fast in Python, even on large files, and it 1344 # eliminates lots of range checking later, making the result faster. 1345 # We're examining source files, and today, it would be EXTREMELY bad practice 1346 # to create source files larger than main memory space. 1347 # Better to load it all in, and get the increased speed and reduced 1348 # development time that results. 1349 1350 if not quiet: 1351 if output_format: 1352 print "Examining", h(f), "<br>" 1353 else: 1354 print "Examining", f 1355 sys.stdout.flush() 1356 1357 text = string.join(input.readlines(),"") 1358 1359 i = 0 1360 while i < len(text): 1361 # This is a trivial tokenizer that just tries to find "words", which 1362 # match [A-Za-z_][A-Za-z0-9_]*. It skips comments & strings. 1363 # It also skips "#include <...>", which must be handled specially 1364 # because "<" and ">" aren't usually delimiters. 1365 # It doesn't bother to tokenize anything else, since it's not used. 1366 # The following is a state machine with 3 states: incomment, instring, 1367 # and "normal", and a separate state "linebegin" if at BOL. 1368 1369 # Skip any whitespace 1370 m = p_whitespace.match(text,i) 1371 if m: 1372 i = m.end(0) 1373 1374 if i >= len(text): 1375 c = "\n" # Last line with no newline, we're done 1376 else: 1377 c = text[i] 1378 if linebegin: # If at beginning of line, see if #include is there. 1379 linebegin = 0 1380 if c == "#": codeinline = 1 # A directive, count as code. 1381 m = p_include.match(text,i) 1382 if m: # Found #include, skip it. Otherwise: #include <stdio.h> 1383 i = m.end(0) 1384 continue 1385 if c == "\n": 1386 linenumber = linenumber + 1 1387 sumlines = sumlines + 1 1388 linebegin = 1 1389 if codeinline: sloc = sloc + 1 1390 codeinline = 0 1391 i = i +1 1392 continue 1393 i = i + 1 # From here on, text[i] points to next character. 1394 if i < len(text): nextc = text[i] 1395 else: nextc = '' 1396 if incomment: 1397 if c=='*' and nextc=='/': 1398 i = i + 1 1399 incomment = 0 1400 elif instring: 1401 if c == '\\' and (nextc != "\n"): i = i + 1 1402 elif c == '"' and instring == 1: instring = 0 1403 elif c == "'" and instring == 2: instring = 0 1404 else: 1405 if c=='/' and nextc=='*': 1406 m = p_directive.match(text, i+1) # Is there a directive here? 1407 if m: 1408 process_directive() 1409 i = i + 1 1410 incomment = 1 1411 elif c=='/' and nextc=='/': # "//" comments - skip to EOL. 1412 m = p_directive.match(text, i+1) # Is there a directive here? 1413 if m: 1414 process_directive() 1415 while i<len(text) and text[i] != "\n": 1416 i = i + 1 1417 elif c=='"': 1418 instring = 1 1419 codeinline = 1 1420 elif c=="'": 1421 instring = 2 1422 codeinline = 1 1423 else: 1424 codeinline = 1 # It's not whitespace, comment, or string. 1425 m = p_c_word.match(text, i-1) 1426 if m: # Do we have a word? 1427 startpos=i-1 1428 endpos = m.end(0) 1429 i = endpos 1430 word = text[startpos:endpos] 1431 # print "Word is:", text[startpos:endpos] 1432 if (word in c_ruleset) and c_valid_match(text, endpos): 1433 if ((patch_infos == None) or ((patch_infos != None) and 1434 (linenumber in patch_infos[f]))): 1435 # FOUND A MATCH, setup & call hook. 1436 # print "HIT: #%s#\n" % word 1437 # Don't use the tuple assignment form, e.g., a,b=c,d 1438 # because Python (least 2.2.2) does that slower 1439 # (presumably because it creates & destroys temporary tuples) 1440 hit = Hit(c_ruleset[word]) 1441 hit.name = word 1442 hit.start = startpos 1443 hit.end = endpos 1444 hit.line = linenumber 1445 hit.column = find_column(text, startpos) 1446 hit.filename=filename 1447 hit.context_text = get_context(text, startpos) 1448 hit.parameters = extract_c_parameters(text, endpos) 1449 if hit.extract_lookahead: 1450 hit.lookahead = text[startpos:startpos+max_lookahead] 1451 hit.hook(hit) 1452 elif p_digits.match(c): 1453 while i<len(text) and p_digits.match(text[i]): # Process a number. 1454 i = i + 1 1455 # else some other character, which we ignore. 1456 # End of loop through text. Wrap up. 1457 if codeinline: sloc = sloc + 1 1458 if incomment: error("File ended while in comment.") 1459 if instring: error("File ended while in string.") 1460 1461def expand_ruleset(ruleset): 1462 # Rulesets can have compressed sets of rules 1463 # (multiple function names separated by "|". 1464 # Expand the given ruleset. 1465 # Note that this for loop modifies the ruleset while it's iterating! 1466 for rule in ruleset.keys(): 1467 if string.find(rule, "|") != -1: # We found a rule to expand. 1468 for newrule in string.split(rule, "|"): 1469 if newrule in ruleset: 1470 print "Error: Rule %s, when expanded, overlaps %s" % ( rule, newrule ) 1471 sys.exit(1) 1472 ruleset[newrule] = ruleset[rule] 1473 del ruleset[rule] 1474 # To print out the set of keys in the expanded ruleset, run: 1475 # print `ruleset.keys()` 1476 1477def display_ruleset(ruleset): 1478 # First, sort the list by function name: 1479 sortedkeys = ruleset.keys() 1480 sortedkeys.sort() 1481 # Now, print them out: 1482 for key in sortedkeys: 1483 print key + "\t" + str(ruleset[key][1]) + "\t" + ruleset[key][2] # function name, default level, default warning 1484 1485def initialize_ruleset(): 1486 expand_ruleset(c_ruleset) 1487 if showheading: 1488 print "Number of rules (primarily dangerous function names) in C/C++ ruleset:", len(c_ruleset) 1489 if output_format: print "<p>" 1490 if list_rules: 1491 display_ruleset(c_ruleset) 1492 sys.exit(0) 1493 1494 1495# Show the header, but only if it hasn't been shown yet. 1496def display_header(): 1497 global displayed_header 1498 if not showheading: return 1499 if not displayed_header: 1500 if output_format: 1501 print ('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" ' + 1502 '"http://www.w3.org/TR/html4/loose.dtd">') 1503 print "<html>" 1504 print "<head>" 1505 print '<meta http-equiv="Content-type" content="text/html; charset=utf8">' 1506 print "<title>Flawfinder Results</title>" 1507 print '<meta name="author" content="David A. Wheeler">' 1508 print '<meta name="keywords" lang="en" content="flawfinder results, security scan">' 1509 print "</head>" 1510 print "<body>" 1511 print "<h1>Flawfinder Results</h1>" 1512 print "Here are the security scan results from" 1513 print '<a href="http://www.dwheeler.com/flawfinder">Flawfinder version %s</a>,' % version 1514 print '(C) 2001-2014 <a href="http://www.dwheeler.com">David A. Wheeler</a>.' 1515 else: 1516 print "Flawfinder version %s, (C) 2001-2014 David A. Wheeler." % version 1517 displayed_header = 1 1518 1519 1520c_extensions = { '.c' : 1, '.h' : 1, 1521 '.ec': 1, '.ecp': 1, # Informix embedded C. 1522 '.pgc': 1, # Postgres embedded C. 1523 '.C': 1, '.cpp': 1, '.CPP': 1, '.cxx': 1, '.cc': 1, # C++ 1524 '.CC' : 1, '.c++' :1, # C++. 1525 '.pcc': 1, # Oracle C++ 1526 '.hpp': 1, '.H' : 1, # .h - usually C++. 1527 } 1528 1529 1530def maybe_process_file(f, patch_infos): 1531 # process f, but only if (1) it's a directory (so we recurse), or 1532 # (2) it's source code in a language we can handle. 1533 # Currently, for files that means only C/C++, and we check if the filename 1534 # has a known C/C++ filename extension. If it doesn't, we ignore the file. 1535 # We accept symlinks only if allowlink is true. 1536 global num_links_skipped, num_dotdirs_skipped 1537 if os.path.isdir(f): 1538 if (not allowlink) and os.path.islink(f): 1539 if not quiet: print_warning("Skipping symbolic link directory " + h(f)) 1540 num_links_skipped = num_links_skipped + 1 1541 return 1542 base_filename = os.path.basename(f) 1543 if (skipdotdir and len(base_filename) > 1 and ("." == base_filename[0])): 1544 if not quiet: print_warning("Skipping directory with initial dot " + h(f)) 1545 num_dotdirs_skipped = num_dotdirs_skipped + 1 1546 return 1547 for file in os.listdir(f): 1548 maybe_process_file(os.path.join(f, file), patch_infos) 1549 # Now we will FIRST check if the file appears to be a C/C++ file, and 1550 # THEN check if it's a regular file or symlink. This is more complicated, 1551 # but I do it this way so that there won't be a lot of pointless 1552 # warnings about skipping files we wouldn't have used anyway. 1553 dotposition = string.rfind(f, ".") 1554 if dotposition > 1: 1555 extension = f[dotposition:] 1556 if extension in c_extensions: 1557 # Its name appears to be a C/C++ source code file. 1558 if (not allowlink) and os.path.islink(f): 1559 if not quiet: print_warning("Skipping symbolic link file " + h(f)) 1560 num_links_skipped = num_links_skipped + 1 1561 elif not os.path.isfile(f): 1562 # Skip anything not a normal file. This is so that 1563 # device files, etc. won't cause trouble. 1564 if not quiet: print_warning("Skipping non-regular file " + h(f)) 1565 else: 1566 # We want to know the difference only with files found in the patch. 1567 if ((patch_infos == None) or (patch_infos != None and 1568 (f in patch_infos))): 1569 process_c_file(f, patch_infos) 1570 1571 1572def process_file_args(files, patch_infos): 1573 # Process the list of "files", some of which may be directories, 1574 # which were given on the command line. 1575 # This is handled differently than anything not found on the command line 1576 # (i.e. through recursing into a directory) because flawfinder 1577 # ALWAYS processes normal files given on the command line. 1578 # This is done to give users control over what's processed; 1579 # if a user really, really wants to analyze a file, name it! 1580 # If user wants to process "this directory and down", just say ".". 1581 # We handle symlinks specially, handle normal files and directories, 1582 # and skip the rest to prevent security problems. "-" is stdin. 1583 global num_links_skipped 1584 for f in files: 1585 if (not allowlink) and os.path.islink(f): 1586 if not quiet: print_warning("Skipping symbolic link " + h(f)) 1587 num_links_skipped = num_links_skipped + 1 1588 elif os.path.isfile(f) or f == "-": 1589 # If on the command line, FORCE processing of it. 1590 # Currently, we only process C/C++. 1591 # check if we only want to review a patch 1592 if ( (patch_infos != None and k in patch_infos) or (patch_infos == None) ): 1593 process_c_file(f, patch_infos) 1594 elif os.path.isdir(f): 1595 # At one time flawfinder used os.path.walk, but that Python 1596 # built-in doesn't give us enough control over symbolic links. 1597 # So, we'll walk the filesystem hierarchy ourselves: 1598 maybe_process_file(f, patch_infos) 1599 elif not os.path.exists(f): 1600 if not quiet: 1601 if h(f).startswith("\342\210\222"): 1602 print_warning("Skipping non-existent filename starting with UTF-8 long dash " + h(f)) 1603 else: 1604 print_warning("Skipping non-existent file " + h(f)) 1605 else: 1606 if not quiet: print_warning("Skipping non-regular file " + h(f)) 1607 1608def usage(): 1609 print """ 1610flawfinder [--help | -h] [--version] [--listrules] 1611 [--allowlink] [--followdotdir] [--nolink] 1612 [--patch filename | -P filename] 1613 [--inputs | -I] [--minlevel X | -m X] 1614 [--falsepositive | -F] [--neverignore | -n] 1615 [--context | -c] [--columns | -C] [--dataonly | -D] 1616 [--html | -H] [--immediate | -i] [--singleline | -S] 1617 [--omittime] [--quiet | -Q] 1618 [--loadhitlist F] [--savehitlist F] [--diffhitlist F] 1619 [--] [source code file or source root directory]+ 1620 1621 The options cover various aspects of flawfinder as follows. 1622 1623 Documentation: 1624 --help | -h Show this usage help. 1625 --version Show version number. 1626 --listrules List the rules in the ruleset (rule database). 1627 1628 Selecting Input Data: 1629 --allowlink Allow symbolic links. 1630 --followdotdir 1631 Follow directories whose names begin with ".". 1632 Normally they are ignored. 1633 --nolink Skip symbolic links (ignored). 1634 --patch F | -P F 1635 Display information related to the patch F 1636 (patch must be already applied). 1637 1638 Selecting Hits to Display: 1639 --inputs | -I 1640 Show only functions that obtain data from outside the program; 1641 this also sets minlevel to 0. 1642 -m X | --minlevel=X 1643 Set minimum risk level to X for inclusion in hitlist. This 1644 can be from 0 (``no risk'') to 5 (``maximum risk''); the 1645 default is 1. 1646 --falsepositive | -F 1647 Do not include hits that are likely to be false positives. 1648 Currently, this means that function names are ignored if 1649 they're not followed by "(", and that declarations of char- 1650 acter arrays aren't noted. Thus, if you have use a vari- 1651 able named "access" everywhere, this will eliminate refer- 1652 ences to this ordinary variable. This isn't the default, 1653 because this also increases the likelihood of missing 1654 important hits; in particular, function names in #define 1655 clauses and calls through function pointers will be missed. 1656 --neverignore | -n 1657 Never ignore security issues, even if they have an ``ignore'' 1658 directive in a comment. 1659 --regex PATTERN | -e PATTERN 1660 Only report hits that match the regular expression PATTERN. 1661 1662 Selecting Output Format: 1663 --columns | -C 1664 Show the column number (as well as the file name and 1665 line number) of each hit; this is shown after the line number 1666 by adding a colon and the column number in the line (the first 1667 character in a line is column number 1). 1668 --context | -c 1669 Show context (the line having the "hit"/potential flaw) 1670 --dataonly | -D 1671 Don't display the headers and footers of the analysis; 1672 use this along with --quiet to get just the results. 1673 --html | -H 1674 Display as HTML output. 1675 --immediate | -i 1676 Immediately display hits (don't just wait until the end). 1677 --singleline | -S 1678 Single-line output. 1679 --omittime Omit time to run. 1680 --quiet | -Q 1681 Don't display status information (i.e., which files are being 1682 examined) while the analysis is going on. 1683 1684 Hitlist Management: 1685 --savehitlist=F 1686 Save all hits (the "hitlist") to F. 1687 --loadhitlist=F 1688 Load hits from F instead of analyzing source programs. 1689 --diffhitlist=F 1690 Show only hits (loaded or analyzed) not in F. 1691 1692 1693 For more information, please consult the manpage or available 1694 documentation. 1695""" 1696 1697def process_options(): 1698 global show_context, show_inputs, allowlink, skipdotdir, omit_time 1699 global output_format, minimum_level, show_immediately, single_line 1700 global required_regex, required_regex_compiled 1701 global falsepositive 1702 global show_columns, never_ignore, quiet, showheading, list_rules 1703 global loadhitlist, savehitlist, diffhitlist 1704 global patch_file 1705 try: 1706 # Note - as a side-effect, this sets sys.argv[]. 1707 optlist, args = getopt.getopt(sys.argv[1:], "ce:m:nih?CSDQHIFP:", 1708 ["context", "minlevel=", "immediate", "inputs", "input", 1709 "nolink", "falsepositive", "falsepositives", 1710 "columns", "listrules", "omittime", "allowlink", "patch=", 1711 "followdotdir", 1712 "neverignore", "regex=", 1713 "quiet", "dataonly", "html", "singleline", 1714 "loadhitlist=", "savehitlist=", "diffhitlist=", 1715 "version", "help" ]) 1716 for (opt,value) in optlist: 1717 if opt == "--context" or opt == "-c": 1718 show_context = 1 1719 elif opt == "--columns" or opt == "-C": 1720 show_columns = 1 1721 elif opt == "--quiet" or opt == "-Q": 1722 quiet = 1 1723 elif opt == "--dataonly" or opt == "-D": 1724 showheading = 0 1725 elif opt == "--inputs" or opt == "--input" or opt == "-I": 1726 show_inputs = 1 1727 minimum_level = 0 1728 elif opt == "--falsepositive" or opt == "falsepositives" or opt == "-F": 1729 falsepositive = 1 1730 elif opt == "--nolink": 1731 allowlink = 0 1732 elif opt == "--omittime": 1733 omit_time = 1 1734 elif opt == "--allowlink": 1735 allowlink = 1 1736 elif opt == "--followdotdir": 1737 skipdotdir = 0 1738 elif opt == "--listrules": 1739 list_rules = 1 1740 elif opt == "--html" or opt == "-H": 1741 output_format = 1 1742 single_line = 0 1743 elif opt == "--minlevel" or opt == "-m": 1744 minimum_level = string.atoi(value) 1745 elif opt == "--singleline" or opt == "-S": 1746 single_line = 1 1747 elif opt == "--immediate" or opt == "-i": 1748 show_immediately = 1 1749 elif opt == "-n" or opt == "--neverignore": 1750 never_ignore = 1 1751 elif opt == "-e" or opt == "--regex": 1752 required_regex = value 1753 # This will raise an exception if it can't be compiled as a regex: 1754 required_regex_compiled = re.compile(required_regex) 1755 elif opt == "-P" or opt == "--patch": 1756 # Note: This is -P, so that a future -p1 option can strip away 1757 # pathname prefixes (with the same option name as "patch"). 1758 patch_file = value 1759 # If we consider ignore comments we may change a line which was 1760 # previously ignored but which will raise now a valid warning without 1761 # noticing it now. So, set never_ignore. 1762 never_ignore = 1 1763 elif opt == "--loadhitlist": 1764 loadhitlist = value 1765 display_header() 1766 if showheading: print "Loading hits from", value 1767 elif opt == "--savehitlist": 1768 savehitlist = value 1769 display_header() 1770 if showheading: print "Saving hitlist to", value 1771 elif opt == "--diffhitlist": 1772 diffhitlist = value 1773 display_header() 1774 if showheading: print "Showing hits not in", value 1775 elif opt == "--version": 1776 print version 1777 sys.exit(0) 1778 elif opt in [ '-h', '-?', '--help' ]: 1779 # We accept "-?" but do not document it. On Unix-like systems the 1780 # question mark in "-?" should be escaped, and many forget that. 1781 usage() 1782 sys.exit(0) 1783 # For DOS/Windows, expand filenames; for Unix, DON'T expand them 1784 # (the shell will expand them for us). Some sloppy Python programs 1785 # always call "glob", but that's WRONG -- on Unix-like systems that 1786 # will expand twice. Python doesn't have a clean way to detect 1787 # "has globbing occurred", so this is the best I've found: 1788 if os.name == "windows" or os.name == "nt" or os.name == "dos": 1789 sys.argv[1:] = reduce(operator.add, map(glob.glob, args)) 1790 else: 1791 sys.argv[1:] = args 1792 # In Python 2 the convention is "getopt.GetoptError", but we 1793 # use "getopt.error" here so it's compatible with both 1794 # Python 1.5 and Python 2. 1795 except getopt.error, text: 1796 print "*** getopt error:", text 1797 usage() 1798 sys.exit(1) 1799 1800 1801 1802def process_files(): 1803 global hitlist 1804 if loadhitlist: 1805 f = open(loadhitlist) 1806 hitlist = pickle.load(f) 1807 else: 1808 patch_infos = None 1809 if (patch_file != ""): 1810 patch_infos = load_patch_info(patch_file) 1811 files = sys.argv[1:] 1812 if not files: 1813 print "*** No input files" 1814 return None 1815 process_file_args(files, patch_infos) 1816 return 1 1817 1818 1819def show_final_results(): 1820 global hitlist 1821 count = 0 1822 count_per_level = {} 1823 count_per_level_and_up = {} 1824 for i in range(0,6): # Initialize count_per_level 1825 count_per_level[i] = 0 1826 for i in range(0,6): # Initialize count_per_level 1827 count_per_level_and_up[i] = 0 1828 if show_immediately or not quiet: # Separate the final results. 1829 print 1830 if showheading: 1831 if output_format: 1832 print "<h2>Final Results</h2>" 1833 else: 1834 print "FINAL RESULTS:" 1835 print 1836 hitlist.sort() 1837 # Display results. The HTML format now uses 1838 # <ul> so that the format differentiates each entry. 1839 # I'm not using <ol>, because its numbers might be confused with 1840 # the risk levels or line numbers. 1841 if diffhitlist: 1842 diff_file = open(diffhitlist) 1843 diff_hitlist = pickle.load(diff_file) 1844 if output_format: print "<ul>" 1845 for h in hitlist: 1846 if h not in diff_hitlist: 1847 h.show() 1848 count_per_level[h.level] = count_per_level[h.level] + 1 1849 count = count + 1 1850 if output_format: print "</ul>" 1851 diff_file.close() 1852 else: 1853 if output_format: print "<ul>" 1854 for h in hitlist: 1855 h.show() 1856 count_per_level[h.level] = count_per_level[h.level] + 1 1857 if output_format: print "</ul>" 1858 count = len(hitlist) 1859 # Done with list, show the post-hitlist summary. 1860 if showheading: 1861 if output_format: 1862 print "<h2>Analysis Summary</h2>" 1863 else: 1864 print 1865 print "ANALYSIS SUMMARY:" 1866 if output_format: 1867 print "<p>" 1868 else: 1869 print 1870 if count > 0: 1871 print "Hits =", count 1872 else: 1873 print "No hits found." 1874 if output_format: 1875 print "<br>" 1876 # Compute the amount of time spent, and lines analyzed/second. 1877 # By computing time here, we also include the time for 1878 # producing the list of hits, which is reasonable. 1879 time_analyzing = time.time() - starttime 1880 if required_regex: 1881 print "Hits limited to regular expression " + required_regex 1882 print "Lines analyzed = %d" % sumlines, 1883 if time_analyzing > 0 and not omit_time: # Avoid divide-by-zero. 1884 print "in approximately %.2f seconds (%.0f lines/second)" % ( 1885 time_analyzing, 1886 (sumlines / time_analyzing) ) 1887 else: 1888 print 1889 if output_format: print "<br>" 1890 print "Physical Source Lines of Code (SLOC) = %d" % sloc 1891 if output_format: print "<br>" 1892 # Output hits@each level. 1893 print "Hits@level =", 1894 for i in range(0,6): 1895 print "[%d] %3d" % (i, count_per_level[i]), 1896 if output_format: 1897 print "<br>" 1898 else: 1899 print 1900 # Compute hits at "level x or higher" 1901 print "Hits@level+ =", 1902 for i in range(0,6): 1903 for j in range(i,6): 1904 count_per_level_and_up[i] = count_per_level_and_up[i] + count_per_level[j] 1905 # Display hits at "level x or higher" 1906 for i in range(0,6): 1907 print "[%d+] %3d" % (i, count_per_level_and_up[i]), 1908 if output_format: 1909 print "<br>" 1910 else: 1911 print 1912 if (sloc > 0): 1913 print "Hits/KSLOC@level+ =", 1914 for i in range(0,6): 1915 print "[%d+] %3g" % (i, count_per_level_and_up[i]*1000.0/sloc), 1916 if output_format: 1917 print "<br>" 1918 else: 1919 print 1920 # 1921 if num_links_skipped: 1922 print "Symlinks skipped =", num_links_skipped, "(--allowlink overrides but see doc for security issue)" 1923 if output_format: 1924 print "<br>" 1925 if num_dotdirs_skipped: 1926 print "Dot directories skipped =", num_dotdirs_skipped, "(--followdotdir overrides)" 1927 if output_format: 1928 print "<br>" 1929 if num_ignored_hits > 0: 1930 print "Suppressed hits =", num_ignored_hits, "(use --neverignore to show them)" 1931 if output_format: 1932 print "<br>" 1933 print "Minimum risk level = %d" % minimum_level 1934 if output_format: print "<br>" 1935 if count > 0: 1936 print "Not every hit is necessarily a security vulnerability." 1937 if output_format: 1938 print "<br>" 1939 print "There may be other security vulnerabilities; review your code!" 1940 if output_format: 1941 print "<br>" 1942 print "See '<a href=\"http://www.dwheeler.com/secure-programs\">Secure Programming for Linux and Unix HOWTO</a>'" 1943 print "(<a href=\"http://www.dwheeler.com/secure-programs\">http://www.dwheeler.com/secure-programs</a>) for more information." 1944 else: 1945 print "See 'Secure Programming for Linux and Unix HOWTO'" 1946 print "(http://www.dwheeler.com/secure-programs) for more information." 1947 if output_format: 1948 print "</body>" 1949 print "</html>" 1950 1951 1952def save_if_desired(): 1953 # We'll save entire hitlist, even if only differences displayed. 1954 if savehitlist: 1955 print "Saving hitlist to", savehitlist 1956 f = open(savehitlist, "w") 1957 pickle.dump(hitlist, f) 1958 f.close() 1959 1960def flawfind(): 1961 process_options() 1962 display_header() 1963 initialize_ruleset() 1964 if process_files(): 1965 show_final_results() 1966 save_if_desired() 1967 1968if __name__ == '__main__': 1969 try: 1970 flawfind() 1971 except KeyboardInterrupt: 1972 print "*** Flawfinder interrupted" 1973 1974