1 /*
2   chronyd/chronyc - Programs for keeping computer clocks accurate.
3 
4  **********************************************************************
5  * Copyright (C) Miroslav Lichvar  2016-2019
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of version 2 of the GNU General Public License as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
19  *
20  **********************************************************************
21 
22   =======================================================================
23 
24   Functions for NTP I/O specific to Linux
25   */
26 
27 #include "config.h"
28 
29 #include "sysincl.h"
30 
31 #include <ifaddrs.h>
32 #include <linux/ethtool.h>
33 #include <linux/net_tstamp.h>
34 #include <linux/sockios.h>
35 #include <net/if.h>
36 
37 #include "array.h"
38 #include "conf.h"
39 #include "hwclock.h"
40 #include "local.h"
41 #include "logging.h"
42 #include "ntp_core.h"
43 #include "ntp_io.h"
44 #include "ntp_io_linux.h"
45 #include "ntp_sources.h"
46 #include "sched.h"
47 #include "socket.h"
48 #include "sys_linux.h"
49 #include "util.h"
50 
51 struct Interface {
52   char name[IF_NAMESIZE];
53   int if_index;
54   int phc_fd;
55   int phc_mode;
56   int phc_nocrossts;
57   /* Link speed in mbit/s */
58   int link_speed;
59   /* Start of UDP data at layer 2 for IPv4 and IPv6 */
60   int l2_udp4_ntp_start;
61   int l2_udp6_ntp_start;
62   /* Precision of PHC readings */
63   double precision;
64   /* Compensation of errors in TX and RX timestamping */
65   double tx_comp;
66   double rx_comp;
67   HCL_Instance clock;
68 };
69 
70 /* Number of PHC readings per HW clock sample */
71 #define PHC_READINGS 10
72 
73 /* Minimum interval between PHC readings */
74 #define MIN_PHC_POLL -6
75 
76 /* Maximum acceptable offset between SW/HW and daemon timestamp */
77 #define MAX_TS_DELAY 1.0
78 
79 /* Array of Interfaces */
80 static ARR_Instance interfaces;
81 
82 /* RX/TX and TX-specific timestamping socket options */
83 static int ts_flags;
84 static int ts_tx_flags;
85 
86 /* Flag indicating the socket options can't be changed in control messages */
87 static int permanent_ts_options;
88 
89 /* When sending client requests to a close and fast server, it is possible that
90    a response will be received before the HW transmit timestamp of the request
91    itself.  To avoid processing of the response without the HW timestamp, we
92    monitor events returned by select() and suspend reading of packets from the
93    receive queue for up to 200 microseconds.  As the requests are normally
94    separated by at least 200 milliseconds, it is sufficient to monitor and
95    suspend one socket at a time. */
96 static int monitored_socket;
97 static int suspended_socket;
98 static SCH_TimeoutID resume_timeout_id;
99 
100 #define RESUME_TIMEOUT 200.0e-6
101 
102 /* Unbound socket keeping the kernel RX timestamping permanently enabled
103    in order to avoid a race condition between receiving a server response
104    and the kernel actually starting to timestamp received packets after
105    enabling the timestamping and sending a request */
106 static int dummy_rxts_socket;
107 
108 #define INVALID_SOCK_FD -3
109 
110 /* ================================================== */
111 
112 static int
add_interface(CNF_HwTsInterface * conf_iface)113 add_interface(CNF_HwTsInterface *conf_iface)
114 {
115   struct ethtool_ts_info ts_info;
116   struct hwtstamp_config ts_config;
117   struct ifreq req;
118   int sock_fd, if_index, phc_fd, req_hwts_flags, rx_filter;
119   unsigned int i;
120   struct Interface *iface;
121 
122   /* Check if the interface was not already added */
123   for (i = 0; i < ARR_GetSize(interfaces); i++) {
124     if (!strcmp(conf_iface->name, ((struct Interface *)ARR_GetElement(interfaces, i))->name))
125       return 1;
126   }
127 
128   sock_fd = SCK_OpenUdpSocket(NULL, NULL, NULL, 0);
129   if (sock_fd < 0)
130     return 0;
131 
132   memset(&req, 0, sizeof (req));
133   memset(&ts_info, 0, sizeof (ts_info));
134 
135   if (snprintf(req.ifr_name, sizeof (req.ifr_name), "%s", conf_iface->name) >=
136       sizeof (req.ifr_name)) {
137     SCK_CloseSocket(sock_fd);
138     return 0;
139   }
140 
141   if (ioctl(sock_fd, SIOCGIFINDEX, &req)) {
142     DEBUG_LOG("ioctl(%s) failed : %s", "SIOCGIFINDEX", strerror(errno));
143     SCK_CloseSocket(sock_fd);
144     return 0;
145   }
146 
147   if_index = req.ifr_ifindex;
148 
149   ts_info.cmd = ETHTOOL_GET_TS_INFO;
150   req.ifr_data = (char *)&ts_info;
151 
152   if (ioctl(sock_fd, SIOCETHTOOL, &req)) {
153     DEBUG_LOG("ioctl(%s) failed : %s", "SIOCETHTOOL", strerror(errno));
154     SCK_CloseSocket(sock_fd);
155     return 0;
156   }
157 
158   req_hwts_flags = SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_TX_HARDWARE |
159                    SOF_TIMESTAMPING_RAW_HARDWARE;
160   if ((ts_info.so_timestamping & req_hwts_flags) != req_hwts_flags) {
161     DEBUG_LOG("HW timestamping not supported on %s", req.ifr_name);
162     SCK_CloseSocket(sock_fd);
163     return 0;
164   }
165 
166   if (ts_info.phc_index < 0) {
167     DEBUG_LOG("PHC missing on %s", req.ifr_name);
168     SCK_CloseSocket(sock_fd);
169     return 0;
170   }
171 
172   switch (conf_iface->rxfilter) {
173     case CNF_HWTS_RXFILTER_ANY:
174 #ifdef HAVE_LINUX_TIMESTAMPING_RXFILTER_NTP
175       if (ts_info.rx_filters & (1 << HWTSTAMP_FILTER_NTP_ALL))
176         rx_filter = HWTSTAMP_FILTER_NTP_ALL;
177       else
178 #endif
179       if (ts_info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))
180         rx_filter = HWTSTAMP_FILTER_ALL;
181       else
182         rx_filter = HWTSTAMP_FILTER_NONE;
183       break;
184     case CNF_HWTS_RXFILTER_NONE:
185       rx_filter = HWTSTAMP_FILTER_NONE;
186       break;
187 #ifdef HAVE_LINUX_TIMESTAMPING_RXFILTER_NTP
188     case CNF_HWTS_RXFILTER_NTP:
189       rx_filter = HWTSTAMP_FILTER_NTP_ALL;
190       break;
191 #endif
192     case CNF_HWTS_RXFILTER_PTP:
193       if (ts_info.rx_filters & (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT))
194         rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
195       else if (ts_info.rx_filters & (1 << HWTSTAMP_FILTER_PTP_V2_EVENT))
196         rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
197       else
198         rx_filter = HWTSTAMP_FILTER_NONE;
199       break;
200     default:
201       rx_filter = HWTSTAMP_FILTER_ALL;
202       break;
203   }
204 
205   ts_config.flags = 0;
206   ts_config.tx_type = HWTSTAMP_TX_ON;
207   ts_config.rx_filter = rx_filter;
208   req.ifr_data = (char *)&ts_config;
209 
210   if (ioctl(sock_fd, SIOCSHWTSTAMP, &req)) {
211     LOG(errno == EPERM ? LOGS_ERR : LOGS_DEBUG,
212         "ioctl(%s) failed : %s", "SIOCSHWTSTAMP", strerror(errno));
213 
214     /* Check the current timestamping configuration in case this interface
215        allows only reading of the configuration and it was already configured
216        as requested */
217     req.ifr_data = (char *)&ts_config;
218 #ifdef SIOCGHWTSTAMP
219     if (ioctl(sock_fd, SIOCGHWTSTAMP, &req) ||
220         ts_config.tx_type != HWTSTAMP_TX_ON || ts_config.rx_filter != rx_filter)
221 #endif
222     {
223       SCK_CloseSocket(sock_fd);
224       return 0;
225     }
226   }
227 
228   SCK_CloseSocket(sock_fd);
229 
230   phc_fd = SYS_Linux_OpenPHC(NULL, ts_info.phc_index);
231   if (phc_fd < 0)
232     return 0;
233 
234   iface = ARR_GetNewElement(interfaces);
235 
236   snprintf(iface->name, sizeof (iface->name), "%s", conf_iface->name);
237   iface->if_index = if_index;
238   iface->phc_fd = phc_fd;
239   iface->phc_mode = 0;
240   iface->phc_nocrossts = conf_iface->nocrossts;
241 
242   /* Start with 1 gbit and no VLANs or IPv4/IPv6 options */
243   iface->link_speed = 1000;
244   iface->l2_udp4_ntp_start = 42;
245   iface->l2_udp6_ntp_start = 62;
246 
247   iface->precision = conf_iface->precision;
248   iface->tx_comp = conf_iface->tx_comp;
249   iface->rx_comp = conf_iface->rx_comp;
250 
251   iface->clock = HCL_CreateInstance(conf_iface->min_samples, conf_iface->max_samples,
252                                     UTI_Log2ToDouble(MAX(conf_iface->minpoll, MIN_PHC_POLL)));
253 
254   LOG(LOGS_INFO, "Enabled HW timestamping %son %s",
255       ts_config.rx_filter == HWTSTAMP_FILTER_NONE ? "(TX only) " : "", iface->name);
256 
257   return 1;
258 }
259 
260 /* ================================================== */
261 
262 static int
add_all_interfaces(CNF_HwTsInterface * conf_iface_all)263 add_all_interfaces(CNF_HwTsInterface *conf_iface_all)
264 {
265   CNF_HwTsInterface conf_iface;
266   struct ifaddrs *ifaddr, *ifa;
267   int r;
268 
269   conf_iface = *conf_iface_all;
270 
271   if (getifaddrs(&ifaddr)) {
272     DEBUG_LOG("getifaddrs() failed : %s", strerror(errno));
273     return 0;
274   }
275 
276   for (r = 0, ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
277     conf_iface.name = ifa->ifa_name;
278     if (add_interface(&conf_iface))
279       r = 1;
280   }
281 
282   freeifaddrs(ifaddr);
283 
284   /* Return success if at least one interface was added */
285   return r;
286 }
287 
288 /* ================================================== */
289 
290 static void
update_interface_speed(struct Interface * iface)291 update_interface_speed(struct Interface *iface)
292 {
293   struct ethtool_cmd cmd;
294   struct ifreq req;
295   int sock_fd, link_speed;
296 
297   sock_fd = SCK_OpenUdpSocket(NULL, NULL, NULL, 0);
298   if (sock_fd < 0)
299     return;
300 
301   memset(&req, 0, sizeof (req));
302   memset(&cmd, 0, sizeof (cmd));
303 
304   snprintf(req.ifr_name, sizeof (req.ifr_name), "%s", iface->name);
305   cmd.cmd = ETHTOOL_GSET;
306   req.ifr_data = (char *)&cmd;
307 
308   if (ioctl(sock_fd, SIOCETHTOOL, &req)) {
309     DEBUG_LOG("ioctl(%s) failed : %s", "SIOCETHTOOL", strerror(errno));
310     SCK_CloseSocket(sock_fd);
311     return;
312   }
313 
314   SCK_CloseSocket(sock_fd);
315 
316   link_speed = ethtool_cmd_speed(&cmd);
317 
318   if (iface->link_speed != link_speed) {
319     iface->link_speed = link_speed;
320     DEBUG_LOG("Updated speed of %s to %d Mb/s", iface->name, link_speed);
321   }
322 }
323 
324 /* ================================================== */
325 
326 #if defined(HAVE_LINUX_TIMESTAMPING_OPT_PKTINFO) || defined(HAVE_LINUX_TIMESTAMPING_OPT_TX_SWHW)
327 static int
check_timestamping_option(int option)328 check_timestamping_option(int option)
329 {
330   int sock_fd;
331 
332   sock_fd = SCK_OpenUdpSocket(NULL, NULL, NULL, 0);
333   if (sock_fd < 0)
334     return 0;
335 
336   if (!SCK_SetIntOption(sock_fd, SOL_SOCKET, SO_TIMESTAMPING, option)) {
337     SCK_CloseSocket(sock_fd);
338     return 0;
339   }
340 
341   SCK_CloseSocket(sock_fd);
342   return 1;
343 }
344 #endif
345 
346 /* ================================================== */
347 
348 static int
open_dummy_socket(void)349 open_dummy_socket(void)
350 {
351   int sock_fd, events = 0;
352 
353   sock_fd = SCK_OpenUdpSocket(NULL, NULL, NULL, 0);
354   if (sock_fd < 0)
355     return INVALID_SOCK_FD;
356 
357   if (!NIO_Linux_SetTimestampSocketOptions(sock_fd, 1, &events)) {
358     SCK_CloseSocket(sock_fd);
359     return INVALID_SOCK_FD;
360   }
361 
362   return sock_fd;
363 }
364 
365 /* ================================================== */
366 
367 void
NIO_Linux_Initialise(void)368 NIO_Linux_Initialise(void)
369 {
370   CNF_HwTsInterface *conf_iface;
371   unsigned int i;
372   int hwts;
373 
374   interfaces = ARR_CreateInstance(sizeof (struct Interface));
375 
376   /* Enable HW timestamping on specified interfaces.  If "*" was specified, try
377      all interfaces.  If no interface was specified, enable SW timestamping. */
378 
379   for (i = hwts = 0; CNF_GetHwTsInterface(i, &conf_iface); i++) {
380     if (!strcmp("*", conf_iface->name))
381       continue;
382     if (!add_interface(conf_iface))
383       LOG_FATAL("Could not enable HW timestamping on %s", conf_iface->name);
384     hwts = 1;
385   }
386 
387   for (i = 0; CNF_GetHwTsInterface(i, &conf_iface); i++) {
388     if (strcmp("*", conf_iface->name))
389       continue;
390     if (add_all_interfaces(conf_iface))
391       hwts = 1;
392     break;
393   }
394 
395   ts_flags = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE;
396   ts_tx_flags = SOF_TIMESTAMPING_TX_SOFTWARE;
397 
398   if (hwts) {
399     ts_flags |= SOF_TIMESTAMPING_RAW_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE;
400     ts_tx_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
401 #ifdef HAVE_LINUX_TIMESTAMPING_OPT_PKTINFO
402     if (check_timestamping_option(SOF_TIMESTAMPING_OPT_PKTINFO))
403       ts_flags |= SOF_TIMESTAMPING_OPT_PKTINFO;
404 #endif
405 #ifdef HAVE_LINUX_TIMESTAMPING_OPT_TX_SWHW
406     if (check_timestamping_option(SOF_TIMESTAMPING_OPT_TX_SWHW))
407       ts_flags |= SOF_TIMESTAMPING_OPT_TX_SWHW;
408 #endif
409   }
410 
411   /* Enable IP_PKTINFO in messages looped back to the error queue */
412   ts_flags |= SOF_TIMESTAMPING_OPT_CMSG;
413 
414   /* Kernels before 4.7 ignore timestamping flags set in control messages */
415   permanent_ts_options = !SYS_Linux_CheckKernelVersion(4, 7);
416 
417   monitored_socket = INVALID_SOCK_FD;
418   suspended_socket = INVALID_SOCK_FD;
419   dummy_rxts_socket = INVALID_SOCK_FD;
420 }
421 
422 /* ================================================== */
423 
424 void
NIO_Linux_Finalise(void)425 NIO_Linux_Finalise(void)
426 {
427   struct Interface *iface;
428   unsigned int i;
429 
430   if (dummy_rxts_socket != INVALID_SOCK_FD)
431     SCK_CloseSocket(dummy_rxts_socket);
432 
433   for (i = 0; i < ARR_GetSize(interfaces); i++) {
434     iface = ARR_GetElement(interfaces, i);
435     HCL_DestroyInstance(iface->clock);
436     close(iface->phc_fd);
437   }
438 
439   ARR_DestroyInstance(interfaces);
440 }
441 
442 /* ================================================== */
443 
444 int
NIO_Linux_SetTimestampSocketOptions(int sock_fd,int client_only,int * events)445 NIO_Linux_SetTimestampSocketOptions(int sock_fd, int client_only, int *events)
446 {
447   int val, flags;
448 
449   if (!ts_flags)
450     return 0;
451 
452   /* Enable SCM_TIMESTAMPING control messages and the socket's error queue in
453      order to receive our transmitted packets with more accurate timestamps */
454 
455   val = 1;
456   flags = ts_flags;
457 
458   if (client_only || permanent_ts_options)
459     flags |= ts_tx_flags;
460 
461   if (!SCK_SetIntOption(sock_fd, SOL_SOCKET, SO_SELECT_ERR_QUEUE, val)) {
462     ts_flags = 0;
463     return 0;
464   }
465 
466   if (!SCK_SetIntOption(sock_fd, SOL_SOCKET, SO_TIMESTAMPING, flags)) {
467     ts_flags = 0;
468     return 0;
469   }
470 
471   *events |= SCH_FILE_EXCEPTION;
472   return 1;
473 }
474 
475 /* ================================================== */
476 
477 static void
resume_socket(int sock_fd)478 resume_socket(int sock_fd)
479 {
480   if (monitored_socket == sock_fd)
481     monitored_socket = INVALID_SOCK_FD;
482 
483   if (sock_fd == INVALID_SOCK_FD || sock_fd != suspended_socket)
484     return;
485 
486   suspended_socket = INVALID_SOCK_FD;
487 
488   SCH_SetFileHandlerEvent(sock_fd, SCH_FILE_INPUT, 1);
489 
490   DEBUG_LOG("Resumed RX processing %s timeout fd=%d",
491             resume_timeout_id ? "before" : "on", sock_fd);
492 
493   if (resume_timeout_id) {
494     SCH_RemoveTimeout(resume_timeout_id);
495     resume_timeout_id = 0;
496   }
497 }
498 
499 /* ================================================== */
500 
501 static void
resume_timeout(void * arg)502 resume_timeout(void *arg)
503 {
504   resume_timeout_id = 0;
505   resume_socket(suspended_socket);
506 }
507 
508 /* ================================================== */
509 
510 static void
suspend_socket(int sock_fd)511 suspend_socket(int sock_fd)
512 {
513   resume_socket(suspended_socket);
514 
515   suspended_socket = sock_fd;
516 
517   SCH_SetFileHandlerEvent(suspended_socket, SCH_FILE_INPUT, 0);
518   resume_timeout_id = SCH_AddTimeoutByDelay(RESUME_TIMEOUT, resume_timeout, NULL);
519 
520   DEBUG_LOG("Suspended RX processing fd=%d", sock_fd);
521 }
522 
523 /* ================================================== */
524 
525 int
NIO_Linux_ProcessEvent(int sock_fd,int event)526 NIO_Linux_ProcessEvent(int sock_fd, int event)
527 {
528   if (sock_fd != monitored_socket)
529     return 0;
530 
531   if (event == SCH_FILE_INPUT) {
532     suspend_socket(monitored_socket);
533     monitored_socket = INVALID_SOCK_FD;
534 
535     /* Don't process the message yet */
536     return 1;
537   }
538 
539   return 0;
540 }
541 
542 /* ================================================== */
543 
544 static struct Interface *
get_interface(int if_index)545 get_interface(int if_index)
546 {
547   struct Interface *iface;
548   unsigned int i;
549 
550   for (i = 0; i < ARR_GetSize(interfaces); i++) {
551     iface = ARR_GetElement(interfaces, i);
552     if (iface->if_index != if_index)
553       continue;
554 
555     return iface;
556   }
557 
558   return NULL;
559 }
560 
561 /* ================================================== */
562 
563 static void
process_hw_timestamp(struct Interface * iface,struct timespec * hw_ts,NTP_Local_Timestamp * local_ts,int rx_ntp_length,int family,int l2_length)564 process_hw_timestamp(struct Interface *iface, struct timespec *hw_ts,
565                      NTP_Local_Timestamp *local_ts, int rx_ntp_length, int family,
566                      int l2_length)
567 {
568   struct timespec sample_phc_ts, sample_sys_ts, sample_local_ts, ts;
569   double rx_correction, ts_delay, phc_err, local_err;
570 
571   if (HCL_NeedsNewSample(iface->clock, &local_ts->ts)) {
572     if (!SYS_Linux_GetPHCSample(iface->phc_fd, iface->phc_nocrossts, iface->precision,
573                                 &iface->phc_mode, &sample_phc_ts, &sample_sys_ts,
574                                 &phc_err))
575       return;
576 
577     LCL_CookTime(&sample_sys_ts, &sample_local_ts, &local_err);
578     HCL_AccumulateSample(iface->clock, &sample_phc_ts, &sample_local_ts,
579                          phc_err + local_err);
580 
581     update_interface_speed(iface);
582   }
583 
584   /* We need to transpose RX timestamps as hardware timestamps are normally
585      preamble timestamps and RX timestamps in NTP are supposed to be trailer
586      timestamps.  If we don't know the length of the packet at layer 2, we
587      make an assumption that UDP data start at the same position as in the
588      last transmitted packet which had a HW TX timestamp. */
589   if (rx_ntp_length && iface->link_speed) {
590     if (!l2_length)
591       l2_length = (family == IPADDR_INET4 ? iface->l2_udp4_ntp_start :
592                    iface->l2_udp6_ntp_start) + rx_ntp_length;
593 
594     /* Include the frame check sequence (FCS) */
595     l2_length += 4;
596 
597     rx_correction = l2_length / (1.0e6 / 8 * iface->link_speed);
598 
599     UTI_AddDoubleToTimespec(hw_ts, rx_correction, hw_ts);
600   }
601 
602   if (!HCL_CookTime(iface->clock, hw_ts, &ts, &local_err))
603     return;
604 
605   if (!rx_ntp_length && iface->tx_comp)
606     UTI_AddDoubleToTimespec(&ts, iface->tx_comp, &ts);
607   else if (rx_ntp_length && iface->rx_comp)
608     UTI_AddDoubleToTimespec(&ts, -iface->rx_comp, &ts);
609 
610   ts_delay = UTI_DiffTimespecsToDouble(&local_ts->ts, &ts);
611 
612   if (fabs(ts_delay) > MAX_TS_DELAY) {
613     DEBUG_LOG("Unacceptable timestamp delay %.9f", ts_delay);
614     return;
615   }
616 
617   local_ts->ts = ts;
618   local_ts->err = local_err;
619   local_ts->source = NTP_TS_HARDWARE;
620 }
621 
622 /* ================================================== */
623 
624 static void
process_sw_timestamp(struct timespec * sw_ts,NTP_Local_Timestamp * local_ts)625 process_sw_timestamp(struct timespec *sw_ts, NTP_Local_Timestamp *local_ts)
626 {
627   double ts_delay, local_err;
628   struct timespec ts;
629 
630   LCL_CookTime(sw_ts, &ts, &local_err);
631 
632   ts_delay = UTI_DiffTimespecsToDouble(&local_ts->ts, &ts);
633 
634   if (fabs(ts_delay) > MAX_TS_DELAY) {
635     DEBUG_LOG("Unacceptable timestamp delay %.9f", ts_delay);
636     return;
637   }
638 
639   local_ts->ts = ts;
640   local_ts->err = local_err;
641   local_ts->source = NTP_TS_KERNEL;
642 }
643 
644 /* ================================================== */
645 /* Extract UDP data from a layer 2 message.  Supported is Ethernet
646    with optional VLAN tags. */
647 
648 static int
extract_udp_data(unsigned char * msg,NTP_Remote_Address * remote_addr,int len)649 extract_udp_data(unsigned char *msg, NTP_Remote_Address *remote_addr, int len)
650 {
651   unsigned char *msg_start = msg;
652 
653   remote_addr->ip_addr.family = IPADDR_UNSPEC;
654   remote_addr->port = 0;
655 
656   /* Skip MACs */
657   if (len < 12)
658     return 0;
659   len -= 12, msg += 12;
660 
661   /* Skip VLAN tag(s) if present */
662   while (len >= 4 && msg[0] == 0x81 && msg[1] == 0x00)
663     len -= 4, msg += 4;
664 
665   /* Skip IPv4 or IPv6 ethertype */
666   if (len < 2 || !((msg[0] == 0x08 && msg[1] == 0x00) ||
667                    (msg[0] == 0x86 && msg[1] == 0xdd)))
668     return 0;
669   len -= 2, msg += 2;
670 
671   /* Parse destination address and port from IPv4/IPv6 and UDP headers */
672   if (len >= 20 && msg[0] >> 4 == 4) {
673     int ihl = (msg[0] & 0xf) * 4;
674     uint32_t addr;
675 
676     if (len < ihl + 8 || msg[9] != 17)
677       return 0;
678 
679     memcpy(&addr, msg + 16, sizeof (addr));
680     remote_addr->ip_addr.addr.in4 = ntohl(addr);
681     remote_addr->port = ntohs(*(uint16_t *)(msg + ihl + 2));
682     remote_addr->ip_addr.family = IPADDR_INET4;
683     len -= ihl + 8, msg += ihl + 8;
684 #ifdef FEAT_IPV6
685   } else if (len >= 48 && msg[0] >> 4 == 6) {
686     int eh_len, next_header = msg[6];
687 
688     memcpy(&remote_addr->ip_addr.addr.in6, msg + 24, sizeof (remote_addr->ip_addr.addr.in6));
689     len -= 40, msg += 40;
690 
691     /* Skip IPv6 extension headers if present */
692     while (next_header != 17) {
693       switch (next_header) {
694         case 44:  /* Fragment Header */
695           /* Process only the first fragment */
696           if (ntohs(*(uint16_t *)(msg + 2)) >> 3 != 0)
697             return 0;
698           eh_len = 8;
699           break;
700         case 0:   /* Hop-by-Hop Options */
701         case 43:  /* Routing Header */
702         case 60:  /* Destination Options */
703         case 135: /* Mobility Header */
704           eh_len = 8 * (msg[1] + 1);
705           break;
706         case 51:  /* Authentication Header */
707           eh_len = 4 * (msg[1] + 2);
708           break;
709         default:
710           return 0;
711       }
712 
713       if (eh_len < 8 || len < eh_len + 8)
714         return 0;
715 
716       next_header = msg[0];
717       len -= eh_len, msg += eh_len;
718     }
719 
720     remote_addr->port = ntohs(*(uint16_t *)(msg + 2));
721     remote_addr->ip_addr.family = IPADDR_INET6;
722     len -= 8, msg += 8;
723 #endif
724   } else {
725     return 0;
726   }
727 
728   /* Move the message to fix alignment of its fields */
729   if (len > 0)
730     memmove(msg_start, msg, len);
731 
732   return len;
733 }
734 
735 /* ================================================== */
736 
737 int
NIO_Linux_ProcessMessage(SCK_Message * message,NTP_Local_Address * local_addr,NTP_Local_Timestamp * local_ts,int event)738 NIO_Linux_ProcessMessage(SCK_Message *message, NTP_Local_Address *local_addr,
739                          NTP_Local_Timestamp *local_ts, int event)
740 {
741   struct Interface *iface;
742   int is_tx, ts_if_index, l2_length;
743 
744   is_tx = event == SCH_FILE_EXCEPTION;
745   iface = NULL;
746 
747   ts_if_index = message->timestamp.if_index;
748   if (ts_if_index == INVALID_IF_INDEX)
749     ts_if_index = message->if_index;
750   l2_length = message->timestamp.l2_length;
751 
752   if (!UTI_IsZeroTimespec(&message->timestamp.hw)) {
753     iface = get_interface(ts_if_index);
754     if (iface) {
755       process_hw_timestamp(iface, &message->timestamp.hw, local_ts, !is_tx ? message->length : 0,
756                            message->remote_addr.ip.ip_addr.family, l2_length);
757     } else {
758       DEBUG_LOG("HW clock not found for interface %d", ts_if_index);
759     }
760 
761     /* If a HW transmit timestamp was received, resume processing
762        of non-error messages on this socket */
763     if (is_tx)
764       resume_socket(local_addr->sock_fd);
765   }
766 
767   if (local_ts->source == NTP_TS_DAEMON && !UTI_IsZeroTimespec(&message->timestamp.kernel) &&
768       (!is_tx || UTI_IsZeroTimespec(&message->timestamp.hw))) {
769     process_sw_timestamp(&message->timestamp.kernel, local_ts);
770   }
771 
772   /* If the kernel is slow with enabling RX timestamping, open a dummy
773      socket to keep the kernel RX timestamping permanently enabled */
774   if (!is_tx && local_ts->source == NTP_TS_DAEMON && ts_flags) {
775     DEBUG_LOG("Missing kernel RX timestamp");
776     if (dummy_rxts_socket == INVALID_SOCK_FD)
777       dummy_rxts_socket = open_dummy_socket();
778   }
779 
780   /* Return the message if it's not received from the error queue */
781   if (!is_tx)
782     return 0;
783 
784   /* The data from the error queue includes all layers up to UDP.  We have to
785      extract the UDP data and also the destination address with port as there
786      currently doesn't seem to be a better way to get them both. */
787   l2_length = message->length;
788   message->length = extract_udp_data(message->data, &message->remote_addr.ip, message->length);
789 
790   DEBUG_LOG("Extracted message for %s fd=%d len=%d",
791             UTI_IPSockAddrToString(&message->remote_addr.ip),
792             local_addr->sock_fd, message->length);
793 
794   /* Update assumed position of UDP data at layer 2 for next received packet */
795   if (iface && message->length) {
796     if (message->remote_addr.ip.ip_addr.family == IPADDR_INET4)
797       iface->l2_udp4_ntp_start = l2_length - message->length;
798     else if (message->remote_addr.ip.ip_addr.family == IPADDR_INET6)
799       iface->l2_udp6_ntp_start = l2_length - message->length;
800   }
801 
802   /* Drop the message if it has no timestamp or its processing failed */
803   if (local_ts->source == NTP_TS_DAEMON) {
804     DEBUG_LOG("Missing TX timestamp");
805     return 1;
806   }
807 
808   if (!NIO_UnwrapMessage(message, local_addr->sock_fd))
809     return 1;
810 
811   if (message->length < NTP_HEADER_LENGTH || message->length > sizeof (NTP_Packet))
812     return 1;
813 
814   NSR_ProcessTx(&message->remote_addr.ip, local_addr, local_ts, message->data, message->length);
815 
816   return 1;
817 }
818 
819 /* ================================================== */
820 
821 void
NIO_Linux_RequestTxTimestamp(SCK_Message * message,int sock_fd)822 NIO_Linux_RequestTxTimestamp(SCK_Message *message, int sock_fd)
823 {
824   if (!ts_flags)
825     return;
826 
827   /* If a HW transmit timestamp is requested on a client socket, monitor
828      events on the socket in order to avoid processing of a fast response
829      without the HW timestamp of the request */
830   if (ts_tx_flags & SOF_TIMESTAMPING_TX_HARDWARE && !NIO_IsServerSocket(sock_fd))
831     monitored_socket = sock_fd;
832 
833   /* Check if TX timestamping is disabled on this socket */
834   if (permanent_ts_options || !NIO_IsServerSocket(sock_fd))
835     return;
836 
837   message->timestamp.tx_flags = ts_tx_flags;
838 }
839 
840 /* ================================================== */
841 
842 void
NIO_Linux_NotifySocketClosing(int sock_fd)843 NIO_Linux_NotifySocketClosing(int sock_fd)
844 {
845   resume_socket(sock_fd);
846 }
847