1 /*
2  *
3  * Copyright 2018 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 #include <grpc/support/port_platform.h>
20 
21 #include "src/core/lib/iomgr/buffer_list.h"
22 #include "src/core/lib/iomgr/port.h"
23 
24 #include <grpc/support/log.h>
25 
26 #ifdef GRPC_LINUX_ERRQUEUE
27 #include <netinet/in.h>
28 #include <string.h>
29 #include <time.h>
30 
31 #include "src/core/lib/gprpp/memory.h"
32 
33 namespace grpc_core {
34 namespace {
35 /** Fills gpr_timespec gts based on values from timespec ts */
fill_gpr_from_timestamp(gpr_timespec * gts,const struct timespec * ts)36 void fill_gpr_from_timestamp(gpr_timespec* gts, const struct timespec* ts) {
37   gts->tv_sec = ts->tv_sec;
38   gts->tv_nsec = static_cast<int32_t>(ts->tv_nsec);
39   gts->clock_type = GPR_CLOCK_REALTIME;
40 }
41 
default_timestamps_callback(void *,grpc_core::Timestamps *,grpc_error *)42 void default_timestamps_callback(void* /*arg*/, grpc_core::Timestamps* /*ts*/,
43                                  grpc_error* /*shudown_err*/) {
44   gpr_log(GPR_DEBUG, "Timestamps callback has not been registered");
45 }
46 
47 /** The saved callback function that will be invoked when we get all the
48  * timestamps that we are going to get for a TracedBuffer. */
49 void (*timestamps_callback)(void*, grpc_core::Timestamps*,
50                             grpc_error* shutdown_err) =
51     default_timestamps_callback;
52 
53 /* Used to extract individual opt stats from cmsg, so as to avoid troubles with
54  * unaligned reads */
55 template <typename T>
read_unaligned(const void * ptr)56 T read_unaligned(const void* ptr) {
57   T val;
58   memcpy(&val, ptr, sizeof(val));
59   return val;
60 }
61 
62 /* Extracts opt stats from the tcp_info struct \a info to \a metrics */
extract_opt_stats_from_tcp_info(ConnectionMetrics * metrics,const grpc_core::tcp_info * info)63 void extract_opt_stats_from_tcp_info(ConnectionMetrics* metrics,
64                                      const grpc_core::tcp_info* info) {
65   if (info == nullptr) {
66     return;
67   }
68   if (info->length > offsetof(grpc_core::tcp_info, tcpi_sndbuf_limited)) {
69     metrics->recurring_retrans.emplace(info->tcpi_retransmits);
70     metrics->is_delivery_rate_app_limited.emplace(
71         info->tcpi_delivery_rate_app_limited);
72     metrics->congestion_window.emplace(info->tcpi_snd_cwnd);
73     metrics->reordering.emplace(info->tcpi_reordering);
74     metrics->packet_retx.emplace(info->tcpi_total_retrans);
75     metrics->pacing_rate.emplace(info->tcpi_pacing_rate);
76     metrics->data_notsent.emplace(info->tcpi_notsent_bytes);
77     if (info->tcpi_min_rtt != UINT32_MAX) {
78       metrics->min_rtt.emplace(info->tcpi_min_rtt);
79     }
80     metrics->packet_sent.emplace(info->tcpi_data_segs_out);
81     metrics->delivery_rate.emplace(info->tcpi_delivery_rate);
82     metrics->busy_usec.emplace(info->tcpi_busy_time);
83     metrics->rwnd_limited_usec.emplace(info->tcpi_rwnd_limited);
84     metrics->sndbuf_limited_usec.emplace(info->tcpi_sndbuf_limited);
85   }
86   if (info->length > offsetof(grpc_core::tcp_info, tcpi_dsack_dups)) {
87     metrics->data_sent.emplace(info->tcpi_bytes_sent);
88     metrics->data_retx.emplace(info->tcpi_bytes_retrans);
89     metrics->packet_spurious_retx.emplace(info->tcpi_dsack_dups);
90   }
91 }
92 
93 /** Extracts opt stats from the given control message \a opt_stats to the
94  * connection metrics \a metrics */
extract_opt_stats_from_cmsg(ConnectionMetrics * metrics,const cmsghdr * opt_stats)95 void extract_opt_stats_from_cmsg(ConnectionMetrics* metrics,
96                                  const cmsghdr* opt_stats) {
97   if (opt_stats == nullptr) {
98     return;
99   }
100   const auto* data = CMSG_DATA(opt_stats);
101   constexpr int64_t cmsg_hdr_len = CMSG_ALIGN(sizeof(struct cmsghdr));
102   const int64_t len = opt_stats->cmsg_len - cmsg_hdr_len;
103   int64_t offset = 0;
104 
105   while (offset < len) {
106     const auto* attr = reinterpret_cast<const nlattr*>(data + offset);
107     const void* val = data + offset + NLA_HDRLEN;
108     switch (attr->nla_type) {
109       case TCP_NLA_BUSY: {
110         metrics->busy_usec.emplace(read_unaligned<uint64_t>(val));
111         break;
112       }
113       case TCP_NLA_RWND_LIMITED: {
114         metrics->rwnd_limited_usec.emplace(read_unaligned<uint64_t>(val));
115         break;
116       }
117       case TCP_NLA_SNDBUF_LIMITED: {
118         metrics->sndbuf_limited_usec.emplace(read_unaligned<uint64_t>(val));
119         break;
120       }
121       case TCP_NLA_PACING_RATE: {
122         metrics->pacing_rate.emplace(read_unaligned<uint64_t>(val));
123         break;
124       }
125       case TCP_NLA_DELIVERY_RATE: {
126         metrics->delivery_rate.emplace(read_unaligned<uint64_t>(val));
127         break;
128       }
129       case TCP_NLA_DELIVERY_RATE_APP_LMT: {
130         metrics->is_delivery_rate_app_limited.emplace(
131             read_unaligned<uint8_t>(val));
132         break;
133       }
134       case TCP_NLA_SND_CWND: {
135         metrics->congestion_window.emplace(read_unaligned<uint32_t>(val));
136         break;
137       }
138       case TCP_NLA_MIN_RTT: {
139         metrics->min_rtt.emplace(read_unaligned<uint32_t>(val));
140         break;
141       }
142       case TCP_NLA_SRTT: {
143         metrics->srtt.emplace(read_unaligned<uint32_t>(val));
144         break;
145       }
146       case TCP_NLA_RECUR_RETRANS: {
147         metrics->recurring_retrans.emplace(read_unaligned<uint8_t>(val));
148         break;
149       }
150       case TCP_NLA_BYTES_SENT: {
151         metrics->data_sent.emplace(read_unaligned<uint64_t>(val));
152         break;
153       }
154       case TCP_NLA_DATA_SEGS_OUT: {
155         metrics->packet_sent.emplace(read_unaligned<uint64_t>(val));
156         break;
157       }
158       case TCP_NLA_TOTAL_RETRANS: {
159         metrics->packet_retx.emplace(read_unaligned<uint64_t>(val));
160         break;
161       }
162       case TCP_NLA_DELIVERED: {
163         metrics->packet_delivered.emplace(read_unaligned<uint32_t>(val));
164         break;
165       }
166       case TCP_NLA_DELIVERED_CE: {
167         metrics->packet_delivered_ce.emplace(read_unaligned<uint32_t>(val));
168         break;
169       }
170       case TCP_NLA_BYTES_RETRANS: {
171         metrics->data_retx.emplace(read_unaligned<uint64_t>(val));
172         break;
173       }
174       case TCP_NLA_DSACK_DUPS: {
175         metrics->packet_spurious_retx.emplace(read_unaligned<uint32_t>(val));
176         break;
177       }
178       case TCP_NLA_REORDERING: {
179         metrics->reordering.emplace(read_unaligned<uint32_t>(val));
180         break;
181       }
182       case TCP_NLA_SND_SSTHRESH: {
183         metrics->snd_ssthresh.emplace(read_unaligned<uint32_t>(val));
184         break;
185       }
186     }
187     offset += NLA_ALIGN(attr->nla_len);
188   }
189 }
190 
get_socket_tcp_info(grpc_core::tcp_info * info,int fd)191 static int get_socket_tcp_info(grpc_core::tcp_info* info, int fd) {
192   memset(info, 0, sizeof(*info));
193   info->length = sizeof(*info) - sizeof(socklen_t);
194   return getsockopt(fd, IPPROTO_TCP, TCP_INFO, info, &(info->length));
195 }
196 } /* namespace */
197 
AddNewEntry(TracedBuffer ** head,uint32_t seq_no,int fd,void * arg)198 void TracedBuffer::AddNewEntry(TracedBuffer** head, uint32_t seq_no, int fd,
199                                void* arg) {
200   GPR_DEBUG_ASSERT(head != nullptr);
201   TracedBuffer* new_elem = new TracedBuffer(seq_no, arg);
202   /* Store the current time as the sendmsg time. */
203   new_elem->ts_.sendmsg_time.time = gpr_now(GPR_CLOCK_REALTIME);
204   new_elem->ts_.scheduled_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
205   new_elem->ts_.sent_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
206   new_elem->ts_.acked_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
207 
208   if (get_socket_tcp_info(&new_elem->ts_.info, fd) == 0) {
209     extract_opt_stats_from_tcp_info(&new_elem->ts_.sendmsg_time.metrics,
210                                     &new_elem->ts_.info);
211   }
212   if (*head == nullptr) {
213     *head = new_elem;
214     return;
215   }
216   /* Append at the end. */
217   TracedBuffer* ptr = *head;
218   while (ptr->next_ != nullptr) {
219     ptr = ptr->next_;
220   }
221   ptr->next_ = new_elem;
222 }
223 
ProcessTimestamp(TracedBuffer ** head,struct sock_extended_err * serr,struct cmsghdr * opt_stats,struct scm_timestamping * tss)224 void TracedBuffer::ProcessTimestamp(TracedBuffer** head,
225                                     struct sock_extended_err* serr,
226                                     struct cmsghdr* opt_stats,
227                                     struct scm_timestamping* tss) {
228   GPR_DEBUG_ASSERT(head != nullptr);
229   TracedBuffer* elem = *head;
230   TracedBuffer* next = nullptr;
231   while (elem != nullptr) {
232     /* The byte number refers to the sequence number of the last byte which this
233      * timestamp relates to. */
234     if (serr->ee_data >= elem->seq_no_) {
235       switch (serr->ee_info) {
236         case SCM_TSTAMP_SCHED:
237           fill_gpr_from_timestamp(&(elem->ts_.scheduled_time.time),
238                                   &(tss->ts[0]));
239           extract_opt_stats_from_cmsg(&(elem->ts_.scheduled_time.metrics),
240                                       opt_stats);
241           elem = elem->next_;
242           break;
243         case SCM_TSTAMP_SND:
244           fill_gpr_from_timestamp(&(elem->ts_.sent_time.time), &(tss->ts[0]));
245           extract_opt_stats_from_cmsg(&(elem->ts_.sent_time.metrics),
246                                       opt_stats);
247           elem = elem->next_;
248           break;
249         case SCM_TSTAMP_ACK:
250           fill_gpr_from_timestamp(&(elem->ts_.acked_time.time), &(tss->ts[0]));
251           extract_opt_stats_from_cmsg(&(elem->ts_.acked_time.metrics),
252                                       opt_stats);
253           /* Got all timestamps. Do the callback and free this TracedBuffer.
254            * The thing below can be passed by value if we don't want the
255            * restriction on the lifetime. */
256           timestamps_callback(elem->arg_, &(elem->ts_), GRPC_ERROR_NONE);
257           next = elem->next_;
258           delete static_cast<TracedBuffer*>(elem);
259           *head = elem = next;
260           break;
261         default:
262           abort();
263       }
264     } else {
265       break;
266     }
267   }
268 }
269 
Shutdown(TracedBuffer ** head,void * remaining,grpc_error * shutdown_err)270 void TracedBuffer::Shutdown(TracedBuffer** head, void* remaining,
271                             grpc_error* shutdown_err) {
272   GPR_DEBUG_ASSERT(head != nullptr);
273   TracedBuffer* elem = *head;
274   while (elem != nullptr) {
275     timestamps_callback(elem->arg_, &(elem->ts_), shutdown_err);
276     auto* next = elem->next_;
277     delete elem;
278     elem = next;
279   }
280   *head = nullptr;
281   if (remaining != nullptr) {
282     timestamps_callback(remaining, nullptr, shutdown_err);
283   }
284   GRPC_ERROR_UNREF(shutdown_err);
285 }
286 
grpc_tcp_set_write_timestamps_callback(void (* fn)(void *,grpc_core::Timestamps *,grpc_error * error))287 void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*,
288                                                        grpc_core::Timestamps*,
289                                                        grpc_error* error)) {
290   timestamps_callback = fn;
291 }
292 } /* namespace grpc_core */
293 
294 #else /* GRPC_LINUX_ERRQUEUE */
295 
296 namespace grpc_core {
grpc_tcp_set_write_timestamps_callback(void (* fn)(void *,grpc_core::Timestamps *,grpc_error * error))297 void grpc_tcp_set_write_timestamps_callback(void (*fn)(void*,
298                                                        grpc_core::Timestamps*,
299                                                        grpc_error* error)) {
300   // Cast value of fn to void to avoid unused parameter warning.
301   // Can't comment out the name because some compilers and formatters don't
302   // like the sequence */* , which would arise from */*fn*/.
303   (void)fn;
304   gpr_log(GPR_DEBUG, "Timestamps callback is not enabled for this platform");
305 }
306 } /* namespace grpc_core */
307 
308 #endif /* GRPC_LINUX_ERRQUEUE */
309