1#!/usr/bin/env python 2 3""" 4Copyright (c) 2006-2019 sqlmap developers (http://sqlmap.org/) 5See the file 'LICENSE' for copying permission 6""" 7 8import codecs 9import gzip 10import io 11import logging 12import re 13import struct 14import zlib 15 16from lib.core.common import Backend 17from lib.core.common import extractErrorMessage 18from lib.core.common import extractRegexResult 19from lib.core.common import filterNone 20from lib.core.common import getPublicTypeMembers 21from lib.core.common import getSafeExString 22from lib.core.common import isListLike 23from lib.core.common import randomStr 24from lib.core.common import readInput 25from lib.core.common import resetCookieJar 26from lib.core.common import singleTimeLogMessage 27from lib.core.common import singleTimeWarnMessage 28from lib.core.common import unArrayizeValue 29from lib.core.convert import decodeHex 30from lib.core.convert import getBytes 31from lib.core.convert import getText 32from lib.core.convert import getUnicode 33from lib.core.data import conf 34from lib.core.data import kb 35from lib.core.data import logger 36from lib.core.decorators import cachedmethod 37from lib.core.decorators import lockedmethod 38from lib.core.dicts import HTML_ENTITIES 39from lib.core.enums import DBMS 40from lib.core.enums import HTTP_HEADER 41from lib.core.enums import PLACE 42from lib.core.exception import SqlmapCompressionException 43from lib.core.settings import BLOCKED_IP_REGEX 44from lib.core.settings import DEFAULT_COOKIE_DELIMITER 45from lib.core.settings import EVENTVALIDATION_REGEX 46from lib.core.settings import IDENTYWAF_PARSE_LIMIT 47from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE 48from lib.core.settings import META_CHARSET_REGEX 49from lib.core.settings import PARSE_HEADERS_LIMIT 50from lib.core.settings import SELECT_FROM_TABLE_REGEX 51from lib.core.settings import UNICODE_ENCODING 52from lib.core.settings import VIEWSTATE_REGEX 53from lib.parse.headers import headersParser 54from lib.parse.html import htmlParser 55from thirdparty import six 56from thirdparty.chardet import detect 57from thirdparty.identywaf import identYwaf 58from thirdparty.odict import OrderedDict 59from thirdparty.six import unichr as _unichr 60from thirdparty.six.moves import http_client as _http_client 61 62@lockedmethod 63def forgeHeaders(items=None, base=None): 64 """ 65 Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing 66 the HTTP requests 67 """ 68 69 items = items or {} 70 71 for _ in list(items.keys()): 72 if items[_] is None: 73 del items[_] 74 75 headers = OrderedDict(conf.httpHeaders if base is None else base) 76 headers.update(items.items()) 77 78 class _str(str): 79 def capitalize(self): 80 return _str(self) 81 82 def title(self): 83 return _str(self) 84 85 _ = headers 86 headers = OrderedDict() 87 for key, value in _.items(): 88 success = False 89 90 for _ in headers: 91 if _.upper() == key.upper(): 92 del headers[_] 93 break 94 95 if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)): 96 try: 97 headers[_str(key)] = value # dirty hack for http://bugs.python.org/issue12455 98 except UnicodeEncodeError: # don't do the hack on non-ASCII header names (they have to be properly encoded later on) 99 pass 100 else: 101 success = True 102 if not success: 103 key = '-'.join(_.capitalize() for _ in key.split('-')) 104 headers[key] = value 105 106 if conf.cj: 107 if HTTP_HEADER.COOKIE in headers: 108 for cookie in conf.cj: 109 if cookie.domain_specified and not (conf.hostname or "").endswith(cookie.domain): 110 continue 111 112 if ("%s=" % getUnicode(cookie.name)) in getUnicode(headers[HTTP_HEADER.COOKIE]): 113 if conf.loadCookies: 114 conf.httpHeaders = filterNone((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders) 115 elif kb.mergeCookies is None: 116 message = "you provided a HTTP %s header value, while " % HTTP_HEADER.COOKIE 117 message += "target URL provides its own cookies within " 118 message += "HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE 119 message += "Do you want to merge them in further requests? [Y/n] " 120 121 kb.mergeCookies = readInput(message, default='Y', boolean=True) 122 123 if kb.mergeCookies and kb.injection.place != PLACE.COOKIE: 124 def _(value): 125 return re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(getUnicode(cookie.name)), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value))).replace('\\', r'\\'), value) 126 127 headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE]) 128 129 if PLACE.COOKIE in conf.parameters: 130 conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE]) 131 132 conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders] 133 134 elif not kb.testMode: 135 headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, getUnicode(cookie.name), getUnicode(cookie.value)) 136 137 if kb.testMode and not any((conf.csrfToken, conf.safeUrl)): 138 resetCookieJar(conf.cj) 139 140 return headers 141 142def parseResponse(page, headers, status=None): 143 """ 144 @param page: the page to parse to feed the knowledge base htmlFp 145 (back-end DBMS fingerprint based upon DBMS error messages return 146 through the web application) list and absFilePaths (absolute file 147 paths) set. 148 """ 149 150 if headers: 151 headersParser(headers) 152 153 if page: 154 htmlParser(page if not status else "%s\n\n%s" % (status, page)) 155 156@cachedmethod 157def checkCharEncoding(encoding, warn=True): 158 """ 159 Checks encoding name, repairs common misspellings and adjusts to 160 proper namings used in codecs module 161 162 >>> checkCharEncoding('iso-8858', False) 163 'iso8859-1' 164 >>> checkCharEncoding('en_us', False) 165 'utf8' 166 """ 167 168 if isinstance(encoding, six.binary_type): 169 encoding = getUnicode(encoding) 170 171 if isListLike(encoding): 172 encoding = unArrayizeValue(encoding) 173 174 if encoding: 175 encoding = encoding.lower() 176 else: 177 return encoding 178 179 # Reference: http://www.destructor.de/charsets/index.htm 180 translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"} 181 182 for delimiter in (';', ',', '('): 183 if delimiter in encoding: 184 encoding = encoding[:encoding.find(delimiter)].strip() 185 186 encoding = encoding.replace(""", "") 187 188 # popular typos/errors 189 if "8858" in encoding: 190 encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859 191 elif "8559" in encoding: 192 encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859 193 elif "8895" in encoding: 194 encoding = encoding.replace("8895", "8859") # iso-8895 -> iso-8859 195 elif "5889" in encoding: 196 encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859 197 elif "5589" in encoding: 198 encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859 199 elif "2313" in encoding: 200 encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312 201 elif encoding.startswith("x-"): 202 encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish 203 elif "windows-cp" in encoding: 204 encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254 205 206 # name adjustment for compatibility 207 if encoding.startswith("8859"): 208 encoding = "iso-%s" % encoding 209 elif encoding.startswith("cp-"): 210 encoding = "cp%s" % encoding[3:] 211 elif encoding.startswith("euc-"): 212 encoding = "euc_%s" % encoding[4:] 213 elif encoding.startswith("windows") and not encoding.startswith("windows-"): 214 encoding = "windows-%s" % encoding[7:] 215 elif encoding.find("iso-88") > 0: 216 encoding = encoding[encoding.find("iso-88"):] 217 elif encoding.startswith("is0-"): 218 encoding = "iso%s" % encoding[4:] 219 elif encoding.find("ascii") > 0: 220 encoding = "ascii" 221 elif encoding.find("utf8") > 0: 222 encoding = "utf8" 223 elif encoding.find("utf-8") > 0: 224 encoding = "utf-8" 225 226 # Reference: http://philip.html5.org/data/charsets-2.html 227 if encoding in translate: 228 encoding = translate[encoding] 229 elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding): 230 return None 231 232 # Reference: http://www.iana.org/assignments/character-sets 233 # Reference: http://docs.python.org/library/codecs.html 234 try: 235 codecs.lookup(encoding) 236 except: 237 encoding = None 238 239 if encoding: 240 try: 241 six.text_type(getBytes(randomStr()), encoding) 242 except: 243 if warn: 244 warnMsg = "invalid web page charset '%s'" % encoding 245 singleTimeLogMessage(warnMsg, logging.WARN, encoding) 246 encoding = None 247 248 return encoding 249 250def getHeuristicCharEncoding(page): 251 """ 252 Returns page encoding charset detected by usage of heuristics 253 254 Reference: https://chardet.readthedocs.io/en/latest/usage.html 255 256 >>> getHeuristicCharEncoding(b"<html></html>") 257 'ascii' 258 """ 259 260 key = hash(page) 261 retVal = kb.cache.encoding.get(key) or detect(page)["encoding"] 262 kb.cache.encoding[key] = retVal 263 264 if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""): 265 infoMsg = "heuristics detected web page charset '%s'" % retVal 266 singleTimeLogMessage(infoMsg, logging.INFO, retVal) 267 268 return retVal 269 270def decodePage(page, contentEncoding, contentType, percentDecode=True): 271 """ 272 Decode compressed/charset HTTP response 273 274 >>> getText(decodePage(b"<html>foo&bar</html>", None, "text/html; charset=utf-8")) 275 '<html>foo&bar</html>' 276 """ 277 278 if not page or (conf.nullConnection and len(page) < 2): 279 return getUnicode(page) 280 281 if hasattr(contentEncoding, "lower"): 282 contentEncoding = contentEncoding.lower() 283 else: 284 contentEncoding = "" 285 286 if hasattr(contentType, "lower"): 287 contentType = contentType.lower() 288 else: 289 contentType = "" 290 291 if contentEncoding in ("gzip", "x-gzip", "deflate"): 292 if not kb.pageCompress: 293 return None 294 295 try: 296 if contentEncoding == "deflate": 297 data = io.BytesIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations 298 else: 299 data = gzip.GzipFile("", "rb", 9, io.BytesIO(page)) 300 size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py 301 if size > MAX_CONNECTION_TOTAL_SIZE: 302 raise Exception("size too large") 303 304 page = data.read() 305 except Exception as ex: 306 if "<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored) 307 errMsg = "detected invalid data for declared content " 308 errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex)) 309 singleTimeLogMessage(errMsg, logging.ERROR) 310 311 warnMsg = "turning off page compression" 312 singleTimeWarnMessage(warnMsg) 313 314 kb.pageCompress = False 315 raise SqlmapCompressionException 316 317 if not conf.encoding: 318 httpCharset, metaCharset = None, None 319 320 # Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode 321 if contentType.find("charset=") != -1: 322 httpCharset = checkCharEncoding(contentType.split("charset=")[-1]) 323 324 metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page)) 325 326 if (any((httpCharset, metaCharset)) and not all((httpCharset, metaCharset))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))): 327 kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence 328 debugMsg = "declared web page charset '%s'" % kb.pageEncoding 329 singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg) 330 else: 331 kb.pageEncoding = None 332 else: 333 kb.pageEncoding = conf.encoding 334 335 # can't do for all responses because we need to support binary files too 336 if isinstance(page, six.binary_type) and "text/" in contentType: 337 if not kb.disableHtmlDecoding: 338 # e.g. 	Ãëàâà 339 if b"&#" in page: 340 page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)), page) 341 page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page) 342 343 # e.g. %20%28%29 344 if percentDecode: 345 if b"%" in page: 346 page = re.sub(b"%([0-9a-fA-F]{2})", lambda _: decodeHex(_.group(1)), page) 347 348 # e.g. & 349 page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page) 350 351 kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page)) 352 353 if (kb.pageEncoding or "").lower() == "utf-8-sig": 354 kb.pageEncoding = "utf-8" 355 if page and page.startswith("\xef\xbb\xbf"): # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling) 356 page = page[3:] 357 358 page = getUnicode(page, kb.pageEncoding) 359 360 # e.g. ’…™ 361 if "&#" in page: 362 def _(match): 363 retVal = match.group(0) 364 try: 365 retVal = _unichr(int(match.group(1))) 366 except (ValueError, OverflowError): 367 pass 368 return retVal 369 page = re.sub(r"&#(\d+);", _, page) 370 371 # e.g. ζ 372 page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page) 373 else: 374 page = getUnicode(page, kb.pageEncoding) 375 376 return page 377 378def processResponse(page, responseHeaders, code=None, status=None): 379 kb.processResponseCounter += 1 380 381 page = page or "" 382 383 parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None, status) 384 385 if not kb.tableFrom and Backend.getIdentifiedDbms() in (DBMS.ACCESS,): 386 kb.tableFrom = extractRegexResult(SELECT_FROM_TABLE_REGEX, page) 387 else: 388 kb.tableFrom = None 389 390 if conf.parseErrors: 391 msg = extractErrorMessage(page) 392 393 if msg: 394 logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.')) 395 396 if kb.processResponseCounter < IDENTYWAF_PARSE_LIMIT: 397 rawResponse = "%s %s %s\n%s\n%s" % (_http_client.HTTPConnection._http_vsn_str, code or "", status or "", getUnicode("".join(responseHeaders.headers if responseHeaders else [])), page) 398 399 identYwaf.non_blind.clear() 400 if identYwaf.non_blind_check(rawResponse, silent=True): 401 for waf in identYwaf.non_blind: 402 if waf not in kb.identifiedWafs: 403 kb.identifiedWafs.add(waf) 404 errMsg = "WAF/IPS identified as '%s'" % identYwaf.format_name(waf) 405 singleTimeLogMessage(errMsg, logging.CRITICAL) 406 407 if kb.originalPage is None: 408 for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX): 409 match = re.search(regex, page) 410 if match and PLACE.POST in conf.parameters: 411 name, value = match.groups() 412 if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]: 413 if conf.paramDict[PLACE.POST][name] in page: 414 continue 415 else: 416 msg = "do you want to automatically adjust the value of '%s'? [y/N]" % name 417 418 if not readInput(msg, default='N', boolean=True): 419 continue 420 421 conf.paramDict[PLACE.POST][name] = value 422 conf.parameters[PLACE.POST] = re.sub(r"(?i)(%s=)[^&]+" % re.escape(name), r"\g<1>%s" % value.replace('\\', r'\\'), conf.parameters[PLACE.POST]) 423 424 if not kb.browserVerification and re.search(r"(?i)browser.?verification", page or ""): 425 kb.browserVerification = True 426 warnMsg = "potential browser verification protection mechanism detected" 427 if re.search(r"(?i)CloudFlare", page): 428 warnMsg += " (CloudFlare)" 429 singleTimeWarnMessage(warnMsg) 430 431 if not kb.captchaDetected and re.search(r"(?i)captcha", page or ""): 432 for match in re.finditer(r"(?si)<form.+?</form>", page): 433 if re.search(r"(?i)captcha", match.group(0)): 434 kb.captchaDetected = True 435 break 436 437 if re.search(r"<meta[^>]+\brefresh\b[^>]+\bcaptcha\b", page): 438 kb.captchaDetected = True 439 440 if kb.captchaDetected: 441 warnMsg = "potential CAPTCHA protection mechanism detected" 442 if re.search(r"(?i)<title>[^<]*CloudFlare", page): 443 warnMsg += " (CloudFlare)" 444 singleTimeWarnMessage(warnMsg) 445 446 if re.search(BLOCKED_IP_REGEX, page): 447 warnMsg = "it appears that you have been blocked by the target server" 448 singleTimeWarnMessage(warnMsg) 449