/* p0f - HTTP fingerprinting ------------------------- Copyright (C) 2012 by Michal Zalewski Distributed under the terms and conditions of GNU LGPL. */ #define _FROM_FP_HTTP #define _GNU_SOURCE #include #include #include #include #include #include #include #include "types.h" #include "config.h" #include "debug.h" #include "alloc-inl.h" #include "process.h" #include "readfp.h" #include "p0f.h" #include "tcp.h" #include "hash.h" #include "fp_http.h" #include "languages.h" static u8** hdr_names; /* List of header names by ID */ static u32 hdr_cnt; /* Number of headers registered */ static u32* hdr_by_hash[SIG_BUCKETS]; /* Hashed header names */ static u32 hbh_cnt[SIG_BUCKETS]; /* Number of headers in bucket */ /* Signatures aren't bucketed due to the complex matching used; but we use Bloom filters to go through them quickly. */ static struct http_sig_record* sigs[2]; static u32 sig_cnt[2]; static struct ua_map_record* ua_map; /* Mappings between U-A and OS */ static u32 ua_map_cnt; #define SLOF(_str) (u8*)_str, strlen((char*)_str) /* Ghetto Bloom filter 4-out-of-64 bitmask generator for adding 32-bit header IDs to a set. We expect around 10 members in a set. */ static inline u64 bloom4_64(u32 val) { u32 hash = hash32(&val, 4, hash_seed); u64 ret; ret = (1LL << (hash & 63)); ret ^= (1LL << ((hash >> 8) & 63)); ret ^= (1LL << ((hash >> 16) & 63)); ret ^= (1LL << ((hash >> 24) & 63)); return ret; } /* Look up or register new header */ static s32 lookup_hdr(u8* name, u32 len, u8 create) { u32 bucket = hash32(name, len, hash_seed) % SIG_BUCKETS; u32* p = hdr_by_hash[bucket]; u32 i = hbh_cnt[bucket]; while (i--) { if (!memcmp(hdr_names[*p], name, len) /* ASAN won't like this... */ && !hdr_names[*p][len]) return *p; p++; } /* Not found! */ if (!create) return -1; hdr_names = DFL_ck_realloc(hdr_names, (hdr_cnt + 1) * sizeof(u8*)); hdr_names[hdr_cnt] = DFL_ck_memdup_str(name, len); hdr_by_hash[bucket] = DFL_ck_realloc(hdr_by_hash[bucket], (hbh_cnt[bucket] + 1) * 4); hdr_by_hash[bucket][hbh_cnt[bucket]++] = hdr_cnt++; return hdr_cnt - 1; } /* Pre-register essential headers. */ void http_init(void) { u32 i; /* Do not change - other code depends on the ordering of first 6 entries. */ lookup_hdr(SLOF("User-Agent"), 1); /* 0 */ lookup_hdr(SLOF("Server"), 1); /* 1 */ lookup_hdr(SLOF("Accept-Language"), 1); /* 2 */ lookup_hdr(SLOF("Via"), 1); /* 3 */ lookup_hdr(SLOF("X-Forwarded-For"), 1); /* 4 */ lookup_hdr(SLOF("Date"), 1); /* 5 */ #define HDR_UA 0 #define HDR_SRV 1 #define HDR_AL 2 #define HDR_VIA 3 #define HDR_XFF 4 #define HDR_DAT 5 i = 0; while (req_optional[i].name) { req_optional[i].id = lookup_hdr(SLOF(req_optional[i].name), 1); i++; } i = 0; while (resp_optional[i].name) { resp_optional[i].id = lookup_hdr(SLOF(resp_optional[i].name), 1); i++; } i = 0; while (req_skipval[i].name) { req_skipval[i].id = lookup_hdr(SLOF(req_skipval[i].name), 1); i++; } i = 0; while (resp_skipval[i].name) { resp_skipval[i].id = lookup_hdr(SLOF(resp_skipval[i].name), 1); i++; } i = 0; while (req_common[i].name) { req_common[i].id = lookup_hdr(SLOF(req_common[i].name), 1); i++; } i = 0; while (resp_common[i].name) { resp_common[i].id = lookup_hdr(SLOF(resp_common[i].name), 1); i++; } } /* Find match for a signature. */ static void http_find_match(u8 to_srv, struct http_sig* ts, u8 dupe_det) { struct http_sig_record* gmatch = NULL; struct http_sig_record* ref = sigs[to_srv]; u32 cnt = sig_cnt[to_srv]; while (cnt--) { struct http_sig* rs = ref->sig; u32 ts_hdr = 0, rs_hdr = 0; if (rs->http_ver != -1 && rs->http_ver != ts->http_ver) goto next_sig; /* Check that all the headers listed for the p0f.fp signature (probably) appear in the examined traffic. */ if ((ts->hdr_bloom4 & rs->hdr_bloom4) != rs->hdr_bloom4) goto next_sig; /* Confirm the ordering and values of headers (this is relatively slow, hence the Bloom filter first). */ while (rs_hdr < rs->hdr_cnt) { u32 orig_ts = ts_hdr; while (rs->hdr[rs_hdr].id != ts->hdr[ts_hdr].id && ts_hdr < ts->hdr_cnt) ts_hdr++; if (ts_hdr == ts->hdr_cnt) { if (!rs->hdr[rs_hdr].optional) goto next_sig; /* If this is an optional header, check that it doesn't appear anywhere else. */ for (ts_hdr = 0; ts_hdr < ts->hdr_cnt; ts_hdr++) if (rs->hdr[rs_hdr].id == ts->hdr[ts_hdr].id) goto next_sig; ts_hdr = orig_ts; rs_hdr++; continue; } if (rs->hdr[rs_hdr].value && (!ts->hdr[ts_hdr].value || !strstr((char*)ts->hdr[ts_hdr].value, (char*)rs->hdr[rs_hdr].value))) goto next_sig; ts_hdr++; rs_hdr++; } /* Check that the headers forbidden in p0f.fp don't appear in the traffic. We first check if they seem to appear in ts->hdr_bloom4, and only if so, we do a full check. */ for (rs_hdr = 0; rs_hdr < rs->miss_cnt; rs_hdr++) { u64 miss_bloom4 = bloom4_64(rs->miss[rs_hdr]); if ((ts->hdr_bloom4 & miss_bloom4) != miss_bloom4) continue; /* Okay, possible instance of a banned header - scan list... */ for (ts_hdr = 0; ts_hdr < ts->hdr_cnt; ts_hdr++) if (rs->miss[rs_hdr] == ts->hdr[ts_hdr].id) goto next_sig; } /* When doing dupe detection, we want to allow a signature with additional banned headers to precede one with fewer, or with a different set. */ if (dupe_det) { if (rs->miss_cnt > ts->miss_cnt) goto next_sig; for (rs_hdr = 0; rs_hdr < rs->miss_cnt; rs_hdr++) { for (ts_hdr = 0; ts_hdr < ts->miss_cnt; ts_hdr++) if (rs->miss[rs_hdr] == ts->miss[ts_hdr]) break; /* One of the reference headers doesn't appear in current sig! */ if (ts_hdr == ts->miss_cnt) goto next_sig; } } /* Whoa, a match. */ if (!ref->generic) { ts->matched = ref; if (rs->sw && ts->sw && !strstr((char*)ts->sw, (char*)rs->sw)) ts->dishonest = 1; return; } else if (!gmatch) gmatch = ref; next_sig: ref = ref + 1; } /* A generic signature is the best we could find. */ if (!dupe_det && gmatch) { ts->matched = gmatch; if (gmatch->sig->sw && ts->sw && !strstr((char*)ts->sw, (char*)gmatch->sig->sw)) ts->dishonest = 1; } } /* Register new HTTP signature. */ void http_register_sig(u8 to_srv, u8 generic, s32 sig_class, u32 sig_name, u8* sig_flavor, u32 label_id, u32* sys, u32 sys_cnt, u8* val, u32 line_no) { struct http_sig* hsig; struct http_sig_record* hrec; u8* nxt; hsig = DFL_ck_alloc(sizeof(struct http_sig)); sigs[to_srv] = DFL_ck_realloc(sigs[to_srv], sizeof(struct http_sig_record) * (sig_cnt[to_srv] + 1)); hrec = &sigs[to_srv][sig_cnt[to_srv]]; if (val[1] != ':') FATAL("Malformed signature in line %u.", line_no); /* http_ver */ switch (*val) { case '0': break; case '1': hsig->http_ver = 1; break; case '*': hsig->http_ver = -1; break; default: FATAL("Bad HTTP version in line %u.", line_no); } val += 2; /* horder */ while (*val != ':') { u32 id; u8 optional = 0; if (hsig->hdr_cnt >= HTTP_MAX_HDRS) FATAL("Too many headers listed in line %u.", line_no); nxt = val; if (*nxt == '?') { optional = 1; val++; nxt++; } while (isalnum(*nxt) || *nxt == '-' || *nxt == '_') nxt++; if (val == nxt) FATAL("Malformed header name in line %u.", line_no); id = lookup_hdr(val, nxt - val, 1); hsig->hdr[hsig->hdr_cnt].id = id; hsig->hdr[hsig->hdr_cnt].optional = optional; if (!optional) hsig->hdr_bloom4 |= bloom4_64(id); val = nxt; if (*val == '=') { if (val[1] != '[') FATAL("Missing '[' after '=' in line %u.", line_no); val += 2; nxt = val; while (*nxt && *nxt != ']') nxt++; if (val == nxt || !*nxt) FATAL("Malformed signature in line %u.", line_no); hsig->hdr[hsig->hdr_cnt].value = DFL_ck_memdup_str(val, nxt - val); val = nxt + 1; } hsig->hdr_cnt++; if (*val == ',') val++; else if (*val != ':') FATAL("Malformed signature in line %u.", line_no); } val++; /* habsent */ while (*val != ':') { u32 id; if (hsig->miss_cnt >= HTTP_MAX_HDRS) FATAL("Too many headers listed in line %u.", line_no); nxt = val; while (isalnum(*nxt) || *nxt == '-' || *nxt == '_') nxt++; if (val == nxt) FATAL("Malformed header name in line %u.", line_no); id = lookup_hdr(val, nxt - val, 1); hsig->miss[hsig->miss_cnt] = id; val = nxt; hsig->miss_cnt++; if (*val == ',') val++; else if (*val != ':') FATAL("Malformed signature in line %u.", line_no); } val++; /* exp_sw */ if (*val) { if (strchr((char*)val, ':')) FATAL("Malformed signature in line %u.", line_no); hsig->sw = DFL_ck_strdup(val); } http_find_match(to_srv, hsig, 1); if (hsig->matched) FATAL("Signature in line %u is already covered by line %u.", line_no, hsig->matched->line_no); hrec->class_id = sig_class; hrec->name_id = sig_name; hrec->flavor = sig_flavor; hrec->label_id = label_id; hrec->sys = sys; hrec->sys_cnt = sys_cnt; hrec->line_no = line_no; hrec->generic = generic; hrec->sig = hsig; sig_cnt[to_srv]++; } /* Register new HTTP signature. */ void http_parse_ua(u8* val, u32 line_no) { u8* nxt; while (*val) { u32 id; u8* name = NULL; nxt = val; while (*nxt && (isalnum(*nxt) || strchr(NAME_CHARS, *nxt))) nxt++; if (val == nxt) FATAL("Malformed system name in line %u.", line_no); id = lookup_name_id(val, nxt - val); val = nxt; if (*val == '=') { if (val[1] != '[') FATAL("Missing '[' after '=' in line %u.", line_no); val += 2; nxt = val; while (*nxt && *nxt != ']') nxt++; if (val == nxt || !*nxt) FATAL("Malformed signature in line %u.", line_no); name = DFL_ck_memdup_str(val, nxt - val); val = nxt + 1; } ua_map = DFL_ck_realloc(ua_map, (ua_map_cnt + 1) * sizeof(struct ua_map_record)); ua_map[ua_map_cnt].id = id; if (!name) ua_map[ua_map_cnt].name = fp_os_names[id]; else ua_map[ua_map_cnt].name = name; ua_map_cnt++; if (*val == ',') val++; } } /* Dump a HTTP signature. */ static u8* dump_sig(u8 to_srv, struct http_sig* hsig) { u32 i; u8 had_prev = 0; struct http_id* list; u8 tmp[HTTP_MAX_SHOW + 1]; u32 tpos; static u8* ret; u32 rlen = 0; u8* val; #define RETF(_par...) do { \ s32 _len = snprintf(NULL, 0, _par); \ if (_len < 0) FATAL("Whoa, snprintf() fails?!"); \ ret = DFL_ck_realloc_kb(ret, rlen + _len + 1); \ snprintf((char*)ret + rlen, _len + 1, _par); \ rlen += _len; \ } while (0) RETF("%u:", hsig->http_ver); for (i = 0; i < hsig->hdr_cnt; i++) { if (hsig->hdr[i].id >= 0) { u8 optional = 0; /* Check the "optional" list. */ list = to_srv ? req_optional : resp_optional; while (list->name) { if (list->id == hsig->hdr[i].id) break; list++; } if (list->name) optional = 1; RETF("%s%s%s", had_prev ? "," : "", optional ? "?" : "", hdr_names[hsig->hdr[i].id]); had_prev = 1; if (!(val = hsig->hdr[i].value)) continue; /* Next, make sure that the value is not on the ignore list. */ if (optional) continue; list = to_srv ? req_skipval : resp_skipval; while (list->name) { if (list->id == hsig->hdr[i].id) break; list++; } if (list->name) continue; /* Looks like it's not on the list, so let's output a cleaned-up version up to HTTP_MAX_SHOW. */ tpos = 0; while (tpos < HTTP_MAX_SHOW && val[tpos] >= 0x20 && val[tpos] < 0x80 && val[tpos] != ']' && val[tpos] != '|') { tmp[tpos] = val[tpos]; tpos++; } tmp[tpos] = 0; if (!tpos) continue; RETF("=[%s]", tmp); } else { RETF("%s%s", had_prev ? "," : "", hsig->hdr[i].name); had_prev = 1; if (!(val = hsig->hdr[i].value)) continue; tpos = 0; while (tpos < HTTP_MAX_SHOW && val[tpos] >= 0x20 && val[tpos] < 0x80 && val[tpos] != ']') { tmp[tpos] = val[tpos]; tpos++; } tmp[tpos] = 0; if (!tpos) continue; RETF("=[%s]", tmp); } } RETF(":"); list = to_srv ? req_common : resp_common; had_prev = 0; while (list->name) { for (i = 0; i < hsig->hdr_cnt; i++) if (hsig->hdr[i].id == list->id) break; if (i == hsig->hdr_cnt) { RETF("%s%s", had_prev ? "," : "", list->name); had_prev = 1; } list++; } RETF(":"); if ((val = hsig->sw)) { tpos = 0; while (tpos < HTTP_MAX_SHOW && val[tpos] >= 0x20 && val[tpos] < 0x80 && val[tpos] != ']') { tmp[tpos] = val[tpos]; tpos++; } tmp[tpos] = 0; if (tpos) RETF("%s", tmp); } return ret; } /* Dump signature flags. */ static u8* dump_flags(struct http_sig* hsig, struct http_sig_record* m) { static u8* ret; u32 rlen = 0; RETF(""); if (hsig->dishonest) RETF(" dishonest"); if (!hsig->sw) RETF(" anonymous"); if (m && m->generic) RETF(" generic"); #undef RETF if (*ret) return ret + 1; else return (u8*)"none"; } /* Score signature differences. For unknown signatures, the presumption is that they identify apps, so the logic is quite different from TCP. */ static void score_nat(u8 to_srv, struct packet_flow* f) { struct http_sig_record* m = f->http_tmp.matched; struct host_data* hd; struct http_sig* ref; u8 score = 0, diff_already = 0; u16 reason = 0; if (to_srv) { hd = f->client; ref = hd->http_req_os; } else { hd = f->server; ref = hd->http_resp; /* If the signature is for a different port, don't read too much into it. */ if (hd->http_resp_port != f->srv_port) ref = NULL; } if (!m) { /* No match. The user is probably running another app; this is only of interest if a server progresses from known to unknown. We can't compare two unknown server sigs with that much confidence. */ if (!to_srv && ref && ref->matched) { DEBUG("[#] HTTP server signature changed from known to unknown.\n"); score += 4; reason |= NAT_TO_UNK; } goto header_check; } if (m->class_id == -1) { /* Got a match for an application signature. Make sure it runs on the OS we have on file... */ verify_tool_class(to_srv, f, m->sys, m->sys_cnt); /* ...and check for inconsistencies in server behavior. */ if (!to_srv && ref && ref->matched) { if (ref->matched->name_id != m->name_id) { DEBUG("[#] Name on the matched HTTP server signature changes.\n"); score += 8; reason |= NAT_APP_LB; } else if (ref->matched->label_id != m->label_id) { DEBUG("[#] Label on the matched HTTP server signature changes.\n"); score += 2; reason |= NAT_APP_LB; } } } else { /* Ooh, spiffy: a match for an OS signature! There will be about two uses for this code, ever. */ if (ref && ref->matched) { if (ref->matched->name_id != m->name_id) { DEBUG("[#] Name on the matched HTTP OS signature changes.\n"); score += 8; reason |= NAT_OS_SIG; diff_already = 1; } else if (ref->matched->name_id != m->label_id) { DEBUG("[#] Label on the matched HTTP OS signature changes.\n"); score += 2; reason |= NAT_OS_SIG; } } else if (ref) { DEBUG("[#] HTTP OS signature changed from unknown to known.\n"); score += 4; reason |= NAT_TO_UNK; } /* If we haven't pointed out anything major yet, also complain if the signature doesn't match host data. */ if (!diff_already && hd->last_name_id != m->name_id) { DEBUG("[#] Matched HTTP OS signature different than host data.\n"); score += 4; reason |= NAT_OS_SIG; } } /* If we have determined that U-A looks legit, but the OS doesn't match, that's a clear sign of trouble. */ if (to_srv && m->class_id == -1 && f->http_tmp.sw && !f->http_tmp.dishonest) { u32 i; for (i = 0; i < ua_map_cnt; i++) if (strstr((char*)f->http_tmp.sw, (char*)ua_map[i].name)) break; if (i != ua_map_cnt) { if (ua_map[i].id != hd->last_name_id) { DEBUG("[#] Otherwise plausible User-Agent points to another OS.\n"); score += 4; reason |= NAT_APP_UA; if (!hd->bad_sw) hd->bad_sw = 1; } else { DEBUG("[#] User-Agent OS value checks out.\n"); hd->bad_sw = 0; } } } header_check: /* Okay, some last-resort checks. This is obviously concerning: */ if (f->http_tmp.via) { DEBUG("[#] Explicit use of Via or X-Forwarded-For.\n"); score += 8; reason |= NAT_APP_VIA; } /* Last but not least, see what happened to 'Date': */ if (ref && !to_srv && ref->date && f->http_tmp.date) { s64 recv_diff = ((s64)f->http_tmp.recv_date) - ref->recv_date; s64 hdr_diff = ((s64)f->http_tmp.date) - ref->date; if (hdr_diff < -HTTP_MAX_DATE_DIFF || hdr_diff > recv_diff + HTTP_MAX_DATE_DIFF) { DEBUG("[#] HTTP 'Date' distance too high (%lld in %lld sec).\n", hdr_diff, recv_diff); score += 4; reason |= NAT_APP_DATE; } else { DEBUG("[#] HTTP 'Date' distance seems fine (%lld in %lld sec).\n", hdr_diff, recv_diff); } } add_nat_score(to_srv, f, reason, score); } /* Look up HTTP signature, create an observation. */ static void fingerprint_http(u8 to_srv, struct packet_flow* f) { struct http_sig_record* m; u8* lang = NULL; http_find_match(to_srv, &f->http_tmp, 0); start_observation(to_srv ? "http request" : "http response", 4, to_srv, f); if ((m = f->http_tmp.matched)) { OBSERVF((m->class_id < 0) ? "app" : "os", "%s%s%s", fp_os_names[m->name_id], m->flavor ? " " : "", m->flavor ? m->flavor : (u8*)""); } else add_observation_field("app", NULL); if (f->http_tmp.lang && isalpha(f->http_tmp.lang[0]) && isalpha(f->http_tmp.lang[1]) && !isalpha(f->http_tmp.lang[2])) { u8 lh = LANG_HASH(f->http_tmp.lang[0], f->http_tmp.lang[1]); u8 pos = 0; while (languages[lh][pos]) { if (f->http_tmp.lang[0] == languages[lh][pos][0] && f->http_tmp.lang[1] == languages[lh][pos][1]) break; pos += 2; } if (!languages[lh][pos]) add_observation_field("lang", NULL); else add_observation_field("lang", (lang = (u8*)languages[lh][pos + 1])); } else add_observation_field("lang", (u8*)"none"); add_observation_field("params", dump_flags(&f->http_tmp, m)); add_observation_field("raw_sig", dump_sig(to_srv, &f->http_tmp)); score_nat(to_srv, f); /* Save observations needed to score future responses. */ if (!to_srv) { /* For server response, always store the signature. */ ck_free(f->server->http_resp); f->server->http_resp = ck_memdup(&f->http_tmp, sizeof(struct http_sig)); f->server->http_resp->hdr_cnt = 0; f->server->http_resp->sw = NULL; f->server->http_resp->lang = NULL; f->server->http_resp->via = NULL; f->server->http_resp_port = f->srv_port; if (lang) f->server->language = lang; if (m) { if (m->class_id != -1) { /* If this is an OS signature, update host record. */ f->server->last_class_id = m->class_id; f->server->last_name_id = m->name_id; f->server->last_flavor = m->flavor; f->server->last_quality = (m->generic * P0F_MATCH_GENERIC); } else { /* Otherwise, record app data. */ f->server->http_name_id = m->name_id; f->server->http_flavor = m->flavor; if (f->http_tmp.dishonest) f->server->bad_sw = 2; } } } else { if (lang) f->client->language = lang; if (m) { if (m->class_id != -1) { /* Client request - only OS sig is of any note. */ ck_free(f->client->http_req_os); f->client->http_req_os = ck_memdup(&f->http_tmp, sizeof(struct http_sig)); f->client->http_req_os->hdr_cnt = 0; f->client->http_req_os->sw = NULL; f->client->http_req_os->lang = NULL; f->client->http_req_os->via = NULL; f->client->last_class_id = m->class_id; f->client->last_name_id = m->name_id; f->client->last_flavor = m->flavor; f->client->last_quality = (m->generic * P0F_MATCH_GENERIC); } else { /* Record app data for the API. */ f->client->http_name_id = m->name_id; f->client->http_flavor = m->flavor; if (f->http_tmp.dishonest) f->client->bad_sw = 2; } } } } /* Free up any allocated strings in http_sig. */ void free_sig_hdrs(struct http_sig* h) { u32 i; for (i = 0; i < h->hdr_cnt; i++) { if (h->hdr[i].name) ck_free(h->hdr[i].name); if (h->hdr[i].value) ck_free(h->hdr[i].value); } } /* Parse HTTP date field. */ static u32 parse_date(u8* str) { struct tm t; if (!strptime((char*)str, "%a, %d %b %Y %H:%M:%S %Z", &t)) { DEBUG("[#] Invalid 'Date' field ('%s').\n", str); return 0; } return mktime(&t); } /* Parse name=value pairs into a signature. */ static u8 parse_pairs(u8 to_srv, struct packet_flow* f, u8 can_get_more) { u32 plen = to_srv ? f->req_len : f->resp_len; u32 off; /* Try to parse name: value pairs. */ while ((off = f->http_pos) < plen) { u8* pay = to_srv ? f->request : f->response; u32 nlen, vlen, vstart; s32 hid; u32 hcount; /* Empty line? Dispatch for fingerprinting! */ if (pay[off] == '\r' || pay[off] == '\n') { f->http_tmp.recv_date = get_unix_time(); fingerprint_http(to_srv, f); /* If this is a request, flush the collected signature and prepare for parsing the response. If it's a response, just shut down HTTP parsing on this flow. */ if (to_srv) { f->http_req_done = 1; f->http_pos = 0; free_sig_hdrs(&f->http_tmp); memset(&f->http_tmp, 0, sizeof(struct http_sig)); return 1; } else { f->in_http = -1; return 0; } } /* Looks like we're getting a header value. See if we have room for it. */ if ((hcount = f->http_tmp.hdr_cnt) >= HTTP_MAX_HDRS) { DEBUG("[#] Too many HTTP headers in a %s.\n", to_srv ? "request" : "response"); f->in_http = -1; return 0; } /* Try to extract header name. */ nlen = 0; while ((isalnum(pay[off]) || pay[off] == '-' || pay[off] == '_') && off < plen && nlen <= HTTP_MAX_HDR_NAME) { off++; nlen++; } if (off == plen) { if (!can_get_more) { DEBUG("[#] End of HTTP %s before end of headers.\n", to_srv ? "request" : "response"); f->in_http = -1; } return can_get_more; } /* Empty, excessively long, or non-':'-followed header name? */ if (!nlen || pay[off] != ':' || nlen > HTTP_MAX_HDR_NAME) { DEBUG("[#] Invalid HTTP header encountered (len = %u, char = 0x%02x).\n", nlen, pay[off]); f->in_http = -1; return 0; } /* At this point, header name starts at f->http_pos, and has nlen bytes. Skip ':' and a subsequent whitespace next. */ off++; if (off < plen && isblank(pay[off])) off++; vstart = off; vlen = 0; /* Find the next \n. */ while (off < plen && vlen <= HTTP_MAX_HDR_VAL && pay[off] != '\n') { off++; vlen++; } if (vlen > HTTP_MAX_HDR_VAL) { DEBUG("[#] HTTP %s header value length exceeded.\n", to_srv ? "request" : "response"); f->in_http = -1; return -1; } if (off == plen) { if (!can_get_more) { DEBUG("[#] End of HTTP %s before end of headers.\n", to_srv ? "request" : "response"); f->in_http = -1; } return can_get_more; } /* If party is using \r\n terminators, go back one char. */ if (pay[off - 1] == '\r') vlen--; /* Header value starts at vstart, and has vlen bytes (may be zero). Record this in the signature. */ hid = lookup_hdr(pay + f->http_pos, nlen, 0); f->http_tmp.hdr[hcount].id = hid; if (hid < 0) { /* Header ID not found, store literal value. */ f->http_tmp.hdr[hcount].name = ck_memdup_str(pay + f->http_pos, nlen); } else { /* Found - update Bloom filter. */ f->http_tmp.hdr_bloom4 |= bloom4_64(hid); } /* If there's a value, store that too. For U-A and Server, also update 'sw'; and for requests, collect Accept-Language. */ if (vlen) { u8* val = ck_memdup_str(pay + vstart, vlen); f->http_tmp.hdr[hcount].value = val; if (to_srv) { switch (hid) { case HDR_UA: f->http_tmp.sw = val; break; case HDR_AL: f->http_tmp.lang = val; break; case HDR_VIA: case HDR_XFF: f->http_tmp.via = val; break; } } else { switch (hid) { case HDR_SRV: f->http_tmp.sw = val; break; case HDR_DAT: f->http_tmp.date = parse_date(val); break; case HDR_VIA: case HDR_XFF: f->http_tmp.via = val; break; } } } /* Moving on... */ f->http_tmp.hdr_cnt++; f->http_pos = off + 1; } if (!can_get_more) { DEBUG("[#] End of HTTP %s before end of headers.\n", to_srv ? "request" : "response"); f->in_http = -1; } return can_get_more; } /* Examine request or response; returns 1 if more data needed and plausibly can be read. Note that the buffer is always NUL-terminated. */ u8 process_http(u8 to_srv, struct packet_flow* f) { /* Already decided this flow is not worth tracking? */ if (f->in_http < 0) return 0; if (to_srv) { u8* pay = f->request; u8 can_get_more = (f->req_len < MAX_FLOW_DATA); u32 off; /* Request done, but pending response? */ if (f->http_req_done) return 1; if (!f->in_http) { u8 chr; u8* sig_at; /* Ooh, new flow! */ if (f->req_len < 15) return can_get_more; /* Scan until \n, or until binary data spotted. */ off = f->http_pos; /* We only care about GET and HEAD requests at this point. */ if (!off && strncmp((char*)pay, "GET /", 5) && strncmp((char*)pay, "HEAD /", 6)) { DEBUG("[#] Does not seem like a GET / HEAD request.\n"); f->in_http = -1; return 0; } while (off < f->req_len && off < HTTP_MAX_URL && (chr = pay[off]) != '\n') { if (chr != '\r' && (chr < 0x20 || chr > 0x7f)) { DEBUG("[#] Not HTTP - character 0x%02x encountered.\n", chr); f->in_http = -1; return 0; } off++; } /* Newline too far or too close? */ if (off == HTTP_MAX_URL || off < 14) { DEBUG("[#] Not HTTP - newline offset %u.\n", off); f->in_http = -1; return 0; } /* Not enough data yet? */ if (off == f->req_len) { f->http_pos = off; if (!can_get_more) { DEBUG("[#] Not HTTP - no opening line found.\n"); f->in_http = -1; } return can_get_more; } sig_at = pay + off - 8; if (pay[off - 1] == '\r') sig_at--; /* Bad HTTP/1.x signature? */ if (strncmp((char*)sig_at, "HTTP/1.", 7)) { DEBUG("[#] Not HTTP - bad signature.\n"); f->in_http = -1; return 0; } f->http_tmp.http_ver = (sig_at[7] == '1'); f->in_http = 1; f->http_pos = off + 1; DEBUG("[#] HTTP detected.\n"); } return parse_pairs(1, f, can_get_more); } else { u8* pay = f->response; u8 can_get_more = (f->resp_len < MAX_FLOW_DATA); u32 off; /* Response before request? Bail out. */ if (!f->in_http || !f->http_req_done) { f->in_http = -1; return 0; } if (!f->http_gotresp1) { u8 chr; if (f->resp_len < 13) return can_get_more; /* Scan until \n, or until binary data spotted. */ off = f->http_pos; while (off < f->resp_len && off < HTTP_MAX_URL && (chr = pay[off]) != '\n') { if (chr != '\r' && (chr < 0x20 || chr > 0x7f)) { DEBUG("[#] Invalid HTTP response - character 0x%02x encountered.\n", chr); f->in_http = -1; return 0; } off++; } /* Newline too far or too close? */ if (off == HTTP_MAX_URL || off < 13) { DEBUG("[#] Invalid HTTP response - newline offset %u.\n", off); f->in_http = -1; return 0; } /* Not enough data yet? */ if (off == f->resp_len) { f->http_pos = off; if (!can_get_more) { DEBUG("[#] Invalid HTTP response - no opening line found.\n"); f->in_http = -1; } return can_get_more; } /* Bad HTTP/1.x signature? */ if (strncmp((char*)pay, "HTTP/1.", 7)) { DEBUG("[#] Invalid HTTP response - bad signature.\n"); f->in_http = -1; return 0; } f->http_tmp.http_ver = (pay[7] == '1'); f->http_pos = off + 1; DEBUG("[#] HTTP response starts correctly.\n"); } return parse_pairs(0, f, can_get_more); } }