1 /** @file
2 
3   This class extends the logging system interface as implemented by the
4   HttpStateMachineGet class.
5 
6   @section license License
7 
8   Licensed to the Apache Software Foundation (ASF) under one
9   or more contributor license agreements.  See the NOTICE file
10   distributed with this work for additional information
11   regarding copyright ownership.  The ASF licenses this file
12   to you under the Apache License, Version 2.0 (the
13   "License"); you may not use this file except in compliance
14   with the License.  You may obtain a copy of the License at
15 
16       http://www.apache.org/licenses/LICENSE-2.0
17 
18   Unless required by applicable law or agreed to in writing, software
19   distributed under the License is distributed on an "AS IS" BASIS,
20   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21   See the License for the specific language governing permissions and
22   limitations under the License.
23  */
24 
25 #pragma once
26 
27 #include "tscore/ink_align.h"
28 #include "LogField.h"
29 
30 class HTTPHdr;
31 class HttpSM;
32 class IpClass;
33 union IpEndpoint;
34 
35 /*-------------------------------------------------------------------------
36   LogAccess
37 
38   This class defines the logging system interface for extracting
39   information required to process a log entry.  This accessor is
40   implemented as an abstract base class with functions for
41   accessing the data based on the derived class.
42 
43   Each function has the ability to marshal its data into a buffer that is
44   provided, and return the number of bytes that were marshalled.  In the
45   absence of a marshalling buffer, the routines will simply return the
46   number of bytes that would be needed to marshal.  This allows for the
47   same functions to be used for both buffer length computations and data
48   movement.
49 
50   Logging deals with values of just two possible data types: integers
51   (including enum) and strings.   Because the integers are multi-byte
52   values that might need special alignment needs when being marshalled,
53   this base class provides a static member function called marshal_int()
54   that handles this (including checking for a NULL buffer).  The template
55   for implementing integer and enum marshalling routines is:
56 
57       int marshal_some_int_value (char *buf)
58       {
59           if (buf) {
60               int64_t val = what_the_value_should_be;
61               marshal_int (buf, val);
62           }
63           return INK_MIN_ALIGN;
64       }
65 
66   String values don't need byte swapping, but we do want to ensure things
67   like trailing NULLS and padding.  The best way to do this is to provide a
68   marshalling routine that takes a source buffer, a string, and its length,
69   and makes sure the string is copied into the source buffer with a
70   trailing NULL.  We've also provided our own strlen() function within the
71   class to adjust for trailing NULL as well.  So, here is how it would look
72   to actually use these functions for marshalling a string value:
73 
74       int marshal_some_string_value (char *buf)
75       {
76           char *str = compute_or_locate_string_value ();
77           int len = LogAccess::strlen (str);
78           if (buf) {
79               marshal_str (buf, str, len);
80           }
81           return len;
82       }
83 
84   -------------------------------------------------------------------------*/
85 
86 // DEFAULT_STR_LEN MUST be less than INK_MIN_ALIGN
87 #define DEFAULT_STR "-"
88 #define DEFAULT_STR_LEN 1
89 
90 extern char INVALID_STR[];
91 
92 enum LogEntryType {
93   LOG_ENTRY_HTTP = 0,
94   N_LOG_ENTRY_TYPES,
95 };
96 
97 enum LogFinishCodeType {
98   LOG_FINISH_FIN = 0,
99   LOG_FINISH_INTR,
100   LOG_FINISH_TIMEOUT,
101   N_LOG_FINISH_CODE_TYPES,
102 };
103 
104 enum LogCacheWriteCodeType {
105   LOG_CACHE_WRITE_NONE = 0,
106   LOG_CACHE_WRITE_LOCK_MISSED,
107   LOG_CACHE_WRITE_LOCK_ABORTED,
108   LOG_CACHE_WRITE_ERROR,
109   LOG_CACHE_WRITE_COMPLETE,
110   N_LOG_CACHE_WRITE_TYPES
111 };
112 
113 class LogAccess
114 {
115 public:
116   inkcoreapi
LogAccess()117   LogAccess()
118   {
119   }
120 
121   explicit LogAccess(HttpSM *sm);
122 
~LogAccess()123   inkcoreapi ~LogAccess() {}
124   inkcoreapi void init();
125 
126   //
127   // client -> proxy fields
128   //
129   inkcoreapi int marshal_client_host_ip(char *);                // STR
130   inkcoreapi int marshal_host_interface_ip(char *);             // STR
131   inkcoreapi int marshal_client_host_port(char *);              // INT
132   inkcoreapi int marshal_client_auth_user_name(char *);         // STR
133   inkcoreapi int marshal_client_req_timestamp_sec(char *);      // INT
134   inkcoreapi int marshal_client_req_timestamp_ms(char *);       // INT
135   inkcoreapi int marshal_client_req_text(char *);               // STR
136   inkcoreapi int marshal_client_req_http_method(char *);        // STR
137   inkcoreapi int marshal_client_req_url(char *);                // STR
138   inkcoreapi int marshal_client_req_url_canon(char *);          // STR
139   inkcoreapi int marshal_client_req_unmapped_url_canon(char *); // STR
140   inkcoreapi int marshal_client_req_unmapped_url_path(char *);  // STR
141   inkcoreapi int marshal_client_req_unmapped_url_host(char *);  // STR
142   inkcoreapi int marshal_client_req_url_path(char *);           // STR
143   inkcoreapi int marshal_client_req_url_scheme(char *);         // STR
144   inkcoreapi int marshal_client_req_http_version(char *);       // INT
145   inkcoreapi int marshal_client_req_protocol_version(char *);   // STR
146   inkcoreapi int marshal_server_req_protocol_version(char *);   // STR
147   inkcoreapi int marshal_client_req_squid_len(char *);          // INT
148   inkcoreapi int marshal_client_req_header_len(char *);         // INT
149   inkcoreapi int marshal_client_req_content_len(char *);        // INT
150   inkcoreapi int marshal_client_req_tcp_reused(char *);         // INT
151   inkcoreapi int marshal_client_req_is_ssl(char *);             // INT
152   inkcoreapi int marshal_client_req_ssl_reused(char *);         // INT
153   inkcoreapi int marshal_client_req_is_internal(char *);        // INT
154   inkcoreapi int marshal_client_req_mptcp_state(char *);        // INT
155   inkcoreapi int marshal_client_security_protocol(char *);      // STR
156   inkcoreapi int marshal_client_security_cipher_suite(char *);  // STR
157   inkcoreapi int marshal_client_security_curve(char *);         // STR
158   inkcoreapi int marshal_client_security_alpn(char *);          // STR
159   inkcoreapi int marshal_client_finish_status_code(char *);     // INT
160   inkcoreapi int marshal_client_req_id(char *);                 // INT
161   inkcoreapi int marshal_client_req_uuid(char *);               // STR
162   inkcoreapi int marshal_client_rx_error_code(char *);          // STR
163   inkcoreapi int marshal_client_tx_error_code(char *);          // STR
164   inkcoreapi int marshal_client_req_all_header_fields(char *);  // STR
165 
166   //
167   // proxy -> client fields
168   //
169   inkcoreapi int marshal_proxy_resp_content_type(char *);      // STR
170   inkcoreapi int marshal_proxy_resp_reason_phrase(char *);     // STR
171   inkcoreapi int marshal_proxy_resp_squid_len(char *);         // INT
172   inkcoreapi int marshal_proxy_resp_content_len(char *);       // INT
173   inkcoreapi int marshal_proxy_resp_status_code(char *);       // INT
174   inkcoreapi int marshal_proxy_resp_header_len(char *);        // INT
175   inkcoreapi int marshal_proxy_finish_status_code(char *);     // INT
176   inkcoreapi int marshal_cache_result_code(char *);            // INT
177   inkcoreapi int marshal_cache_result_subcode(char *);         // INT
178   inkcoreapi int marshal_proxy_host_port(char *);              // INT
179   inkcoreapi int marshal_cache_hit_miss(char *);               // INT
180   inkcoreapi int marshal_proxy_resp_all_header_fields(char *); // STR
181 
182   //
183   // proxy -> server fields
184   //
185   inkcoreapi int marshal_proxy_req_header_len(char *);        // INT
186   inkcoreapi int marshal_proxy_req_squid_len(char *);         // INT
187   inkcoreapi int marshal_proxy_req_content_len(char *);       // INT
188   inkcoreapi int marshal_proxy_req_server_ip(char *);         // INT
189   inkcoreapi int marshal_proxy_req_server_port(char *);       // INT
190   inkcoreapi int marshal_proxy_hierarchy_route(char *);       // INT
191   inkcoreapi int marshal_next_hop_ip(char *);                 // STR
192   inkcoreapi int marshal_next_hop_port(char *);               // INT
193   inkcoreapi int marshal_proxy_host_name(char *);             // STR
194   inkcoreapi int marshal_proxy_host_ip(char *);               // STR
195   inkcoreapi int marshal_proxy_req_is_ssl(char *);            // INT
196   inkcoreapi int marshal_proxy_req_all_header_fields(char *); // STR
197 
198   //
199   // server -> proxy fields
200   //
201   inkcoreapi int marshal_server_host_ip(char *);                // INT
202   inkcoreapi int marshal_server_host_name(char *);              // STR
203   inkcoreapi int marshal_server_resp_status_code(char *);       // INT
204   inkcoreapi int marshal_server_resp_squid_len(char *);         // INT
205   inkcoreapi int marshal_server_resp_content_len(char *);       // INT
206   inkcoreapi int marshal_server_resp_header_len(char *);        // INT
207   inkcoreapi int marshal_server_resp_http_version(char *);      // INT
208   inkcoreapi int marshal_server_resp_time_ms(char *);           // INT
209   inkcoreapi int marshal_server_resp_time_s(char *);            // INT
210   inkcoreapi int marshal_server_transact_count(char *);         // INT
211   inkcoreapi int marshal_server_connect_attempts(char *);       // INT
212   inkcoreapi int marshal_server_resp_all_header_fields(char *); // STR
213 
214   //
215   // cache -> client fields
216   //
217   inkcoreapi int marshal_cache_resp_status_code(char *);       // INT
218   inkcoreapi int marshal_cache_resp_squid_len(char *);         // INT
219   inkcoreapi int marshal_cache_resp_content_len(char *);       // INT
220   inkcoreapi int marshal_cache_resp_header_len(char *);        // INT
221   inkcoreapi int marshal_cache_resp_http_version(char *);      // INT
222   inkcoreapi int marshal_cache_resp_all_header_fields(char *); // STR
223 
224   inkcoreapi void set_client_req_url(char *, int);                // STR
225   inkcoreapi void set_client_req_url_canon(char *, int);          // STR
226   inkcoreapi void set_client_req_unmapped_url_canon(char *, int); // STR
227   inkcoreapi void set_client_req_unmapped_url_path(char *, int);  // STR
228   inkcoreapi void set_client_req_unmapped_url_host(char *, int);  // STR
229   inkcoreapi void set_client_req_url_path(char *, int);           // STR
230 
231   //
232   // congestion control -- client_retry_after_time
233   //
234   inkcoreapi int marshal_client_retry_after_time(char *); // INT
235 
236   //
237   // cache write fields
238   //
239   inkcoreapi int marshal_cache_write_code(char *);           // INT
240   inkcoreapi int marshal_cache_write_transform_code(char *); // INT
241 
242   // other fields
243   //
244   inkcoreapi int marshal_transfer_time_ms(char *);                            // INT
245   inkcoreapi int marshal_transfer_time_s(char *);                             // INT
246   inkcoreapi int marshal_file_size(char *);                                   // INT
247   inkcoreapi int marshal_plugin_identity_id(char *);                          // INT
248   inkcoreapi int marshal_plugin_identity_tag(char *);                         // STR
249   inkcoreapi int marshal_process_uuid(char *);                                // STR
250   inkcoreapi int marshal_client_http_connection_id(char *);                   // INT
251   inkcoreapi int marshal_client_http_transaction_id(char *);                  // INT
252   inkcoreapi int marshal_client_http_transaction_priority_weight(char *);     // INT
253   inkcoreapi int marshal_client_http_transaction_priority_dependence(char *); // INT
254   inkcoreapi int marshal_cache_lookup_url_canon(char *);                      // STR
255   inkcoreapi int marshal_client_sni_server_name(char *);                      // STR
256   inkcoreapi int marshal_client_provided_cert(char *);                        // INT
257   inkcoreapi int marshal_proxy_provided_cert(char *);                         // INT
258   inkcoreapi int marshal_version_build_number(char *);                        // STR
259   inkcoreapi int marshal_version_string(char *);                              // STR
260   inkcoreapi int marshal_cache_read_retries(char *);                          // INT
261   inkcoreapi int marshal_cache_write_retries(char *);                         // INT
262   inkcoreapi int marshal_cache_collapsed_connection_success(char *);          // INT
263   inkcoreapi int marshal_proxy_protocol_version(char *);                      // STR
264   inkcoreapi int marshal_proxy_protocol_src_ip(char *);                       // STR
265   inkcoreapi int marshal_proxy_protocol_dst_ip(char *);                       // STR
266 
267   // named fields from within a http header
268   //
269   inkcoreapi int marshal_http_header_field(LogField::Container container, char *field, char *buf);
270   inkcoreapi int marshal_http_header_field_escapify(LogField::Container container, char *field, char *buf);
271 
272   //
273   // named records.config int variables
274   //
275   int marshal_config_int_var(char *config_var, char *buf);
276 
277   //
278   // named records.config string variables
279   //
280   int marshal_config_str_var(char *config_var, char *buf);
281 
282   //
283   // generic record access
284   //
285   int marshal_record(char *record, char *buf);
286 
287   //
288   // milestones access
289   //
290   inkcoreapi int marshal_milestone(TSMilestonesType ms, char *buf);
291   inkcoreapi int marshal_milestone_fmt_sec(TSMilestonesType ms, char *buf);
292   inkcoreapi int marshal_milestone_fmt_squid(TSMilestonesType ms, char *buf);
293   inkcoreapi int marshal_milestone_fmt_netscape(TSMilestonesType ms, char *buf);
294   inkcoreapi int marshal_milestone_fmt_date(TSMilestonesType ms, char *buf);
295   inkcoreapi int marshal_milestone_fmt_time(TSMilestonesType ms, char *buf);
296   inkcoreapi int marshal_milestone_fmt_ms(TSMilestonesType ms, char *buf);
297   inkcoreapi int marshal_milestone_diff(TSMilestonesType ms1, TSMilestonesType ms2, char *buf);
298   inkcoreapi void set_http_header_field(LogField::Container container, char *field, char *buf, int len);
299   //
300   // unmarshalling routines
301   //
302   // They used to return a string; now they unmarshal directly into the
303   // destination buffer supplied.
304   //
305   static int64_t unmarshal_int(char **buf);
306   static int unmarshal_itoa(int64_t val, char *dest, int field_width = 0, char leading_char = ' ');
307   static int unmarshal_itox(int64_t val, char *dest, int field_width = 0, char leading_char = ' ');
308   static int unmarshal_int_to_str(char **buf, char *dest, int len);
309   static int unmarshal_int_to_str_hex(char **buf, char *dest, int len);
310   static int unmarshal_str(char **buf, char *dest, int len, LogSlice *slice = nullptr);
311   static int unmarshal_ttmsf(char **buf, char *dest, int len);
312   static int unmarshal_int_to_date_str(char **buf, char *dest, int len);
313   static int unmarshal_int_to_time_str(char **buf, char *dest, int len);
314   static int unmarshal_int_to_netscape_str(char **buf, char *dest, int len);
315   static int unmarshal_http_version(char **buf, char *dest, int len);
316   static int unmarshal_http_text(char **buf, char *dest, int len, LogSlice *slice = nullptr);
317   static int unmarshal_http_status(char **buf, char *dest, int len);
318   static int unmarshal_ip(char **buf, IpEndpoint *dest);
319   static int unmarshal_ip_to_str(char **buf, char *dest, int len);
320   static int unmarshal_ip_to_hex(char **buf, char *dest, int len);
321   static int unmarshal_hierarchy(char **buf, char *dest, int len, const Ptr<LogFieldAliasMap> &map);
322   static int unmarshal_finish_status(char **buf, char *dest, int len, const Ptr<LogFieldAliasMap> &map);
323   static int unmarshal_cache_code(char **buf, char *dest, int len, const Ptr<LogFieldAliasMap> &map);
324   static int unmarshal_cache_hit_miss(char **buf, char *dest, int len, const Ptr<LogFieldAliasMap> &map);
325   static int unmarshal_cache_write_code(char **buf, char *dest, int len, const Ptr<LogFieldAliasMap> &map);
326   static int unmarshal_client_protocol_stack(char **buf, char *dest, int len, Ptr<LogFieldAliasMap> map);
327 
328   static int unmarshal_with_map(int64_t code, char *dest, int len, const Ptr<LogFieldAliasMap> &map, const char *msg = nullptr);
329 
330   static int unmarshal_record(char **buf, char *dest, int len);
331 
332   //
333   // our own strlen function that pads strings to even int64_t boundaries
334   // so that there are no alignment problems with the int values.
335   //
336   static int round_strlen(int len);
337   static int strlen(const char *str);
338 
339 public:
340   inkcoreapi static void marshal_int(char *dest, int64_t source);
341   inkcoreapi static void marshal_str(char *dest, const char *source, int padded_len);
342   inkcoreapi static void marshal_mem(char *dest, const char *source, int actual_len, int padded_len);
343   inkcoreapi static int marshal_ip(char *dest, sockaddr const *ip);
344 
345   // noncopyable
346   // -- member functions that are not allowed --
347   LogAccess(const LogAccess &rhs) = delete;      // no copies
348   LogAccess &operator=(LogAccess &rhs) = delete; // or assignment
349 
350 private:
351   HttpSM *m_http_sm;
352 
353   Arena m_arena;
354 
355   HTTPHdr *m_client_request;
356   HTTPHdr *m_proxy_response;
357   HTTPHdr *m_proxy_request;
358   HTTPHdr *m_server_response;
359   HTTPHdr *m_cache_response;
360 
361   char *m_client_req_url_str;
362   int m_client_req_url_len;
363   char *m_client_req_url_canon_str;
364   int m_client_req_url_canon_len;
365   char *m_client_req_unmapped_url_canon_str;
366   int m_client_req_unmapped_url_canon_len;
367   char *m_client_req_unmapped_url_path_str;
368   int m_client_req_unmapped_url_path_len;
369   char *m_client_req_unmapped_url_host_str;
370   int m_client_req_unmapped_url_host_len;
371   char const *m_client_req_url_path_str;
372   int m_client_req_url_path_len;
373   char *m_proxy_resp_content_type_str;
374   int m_proxy_resp_content_type_len;
375   char *m_proxy_resp_reason_phrase_str;
376   int m_proxy_resp_reason_phrase_len;
377   char *m_cache_lookup_url_canon_str;
378   int m_cache_lookup_url_canon_len;
379 
380   void validate_unmapped_url();
381   void validate_unmapped_url_path();
382 
383   void validate_lookup_url();
384 };
385 
386 inline int
round_strlen(int len)387 LogAccess::round_strlen(int len)
388 {
389   return INK_ALIGN_DEFAULT(len);
390 }
391 
392 /*-------------------------------------------------------------------------
393   LogAccess::strlen
394 
395   Take trailing null and alignment padding into account.  This makes sure
396   that strings in the LogBuffer are laid out properly.
397   -------------------------------------------------------------------------*/
398 
399 inline int
strlen(const char * str)400 LogAccess::strlen(const char *str)
401 {
402   if (str == nullptr || str[0] == 0) {
403     return round_strlen(sizeof(DEFAULT_STR));
404   } else {
405     return (int)(round_strlen(((int)::strlen(str) + 1))); // actual bytes for string
406   }
407 }
408 
409 inline void
marshal_int(char * dest,int64_t source)410 LogAccess::marshal_int(char *dest, int64_t source)
411 {
412   // TODO: This used to do htonl on the source. TS-1156
413   *((int64_t *)dest) = source;
414 }
415 
416 /*-------------------------------------------------------------------------
417   resolve_logfield_string
418 
419   This external function takes a format string and a LogAccess context and
420   resolves any known fields to return a new, resolved string.
421   -------------------------------------------------------------------------*/
422 
423 char *resolve_logfield_string(LogAccess *context, const char *format_str);
424