1 // Copyright (C) 2018-2021 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #ifndef HA_COMMUNICATION_STATE_H
8 #define HA_COMMUNICATION_STATE_H
9 
10 #include <ha_config.h>
11 #include <ha_service_states.h>
12 #include <asiolink/interval_timer.h>
13 #include <asiolink/io_service.h>
14 #include <cc/data.h>
15 #include <dhcp/pkt.h>
16 
17 #include <boost/date_time/posix_time/posix_time.hpp>
18 #include <boost/multi_index_container.hpp>
19 #include <boost/multi_index/composite_key.hpp>
20 #include <boost/multi_index/hashed_index.hpp>
21 #include <boost/multi_index/indexed_by.hpp>
22 #include <boost/multi_index/member.hpp>
23 #include <boost/multi_index/ordered_index.hpp>
24 #include <boost/scoped_ptr.hpp>
25 #include <boost/shared_ptr.hpp>
26 
27 #include <functional>
28 #include <map>
29 #include <mutex>
30 #include <set>
31 #include <string>
32 #include <utility>
33 
34 namespace isc {
35 namespace ha {
36 
37 /// @brief Holds communication state between the two HA peers.
38 ///
39 /// The HA service constantly monitors the state of the connection between
40 /// the two peers. If the connection is lost it is an indicator that
41 /// the partner server may be down and failover actions should be triggered.
42 ///
43 /// A heartbeat command successfully sent over the control channel is an
44 /// indicator that the connection is healthy. A reply to the heartbeat
45 /// command includes information about the recipient state, its notion of
46 /// time, and other information useful for determining its health and
47 /// current activity.
48 ///
49 /// This class uses an interval timer to run heartbeat commands over the
50 /// control channel. The implementation of the heartbeat is external to
51 /// this class and is provided via @c CommunicationState::startHeartbeat
52 /// method. This implementation is required to run the @c poke method
53 /// in case of receiving a successful response to the heartbeat command.
54 ///
55 /// The @c poke method sets the "last poke time" to current time, thus
56 /// indicating that the connection is healthy. The @c getDurationInMillisecs
57 /// method is used to check for how long the server hasn't been able
58 /// to communicate with the partner. This duration is simply a time
59 /// elapsed since last successful poke time. If this duration becomes
60 /// greater than the configured threshold, the server assumes that the
61 /// communication with the partner is interrupted.
62 ///
63 /// The derivations of this class provide DHCPv4 and DHCPv6 specific
64 /// mechanisms for detecting server failures based on the analysis of
65 /// the received DHCP messages, i.e. how long the clients have been
66 /// trying to communicate with the partner and message types they sent.
67 /// In particular, the increased number of Rebind messages may indicate
68 /// issues with the DHCP server.
69 ///
70 /// This class is also used to monitor the clock skew between the active
71 /// servers. Maintaining a reasonably low clock skew is essential for the
72 /// HA service to function properly. This class calculates the clock
73 /// skew by comparing local time of the server with the time returned by
74 /// the partner in response to a heartbeat command. If this value exceeds
75 /// the certain thresholds, the CommunicationState::clockSkewShouldWarn
76 /// and the @c CommuicationState::clockSkewShouldTerminate indicate
77 /// whether the HA service should continue to operate normally, should
78 /// start issuing a warning about high clock skew or simply enter the
79 /// "terminated" state refusing to further operate until the clocks
80 /// are synchronized. This requires administrative intervention and the
81 /// restart of the HA service.
82 class CommunicationState {
83 public:
84 
85     /// @brief Constructor.
86     ///
87     /// @param io_service pointer to the common IO service instance.
88     /// @param config pointer to the HA configuration.
89     CommunicationState(const asiolink::IOServicePtr& io_service,
90                        const HAConfigPtr& config);
91 
92     /// @brief Destructor.
93     ///
94     /// Stops scheduled heartbeat.
95     virtual ~CommunicationState();
96 
97     /// @brief Returns last known state of the partner.
98     ///
99     /// @return Partner's state if it is known, or a negative value otherwise.
100     int getPartnerState() const;
101 
102     /// @brief Sets partner state.
103     ///
104     /// @param state new partner's state in a textual form. Supported values are
105     /// those returned in response to a ha-heartbeat command.
106     /// @throw BadValue if unsupported state value was provided.
107     void setPartnerState(const std::string& state);
108 
109 private:
110     /// @brief Sets partner state.
111     ///
112     /// @param state new partner's state in a textual form. Supported values are
113     /// those returned in response to a ha-heartbeat command.
114     /// @throw BadValue if unsupported state value was provided.
115     void setPartnerStateInternal(const std::string& state);
116 
117 public:
118     /// @brief Returns scopes served by the partner server.
119     ///
120     /// @return A set of scopes served by the partner.
121     std::set<std::string> getPartnerScopes() const;
122 
123     /// @brief Sets partner scopes.
124     ///
125     /// @param new_scopes Partner scopes enclosed in a JSON list.
126     void setPartnerScopes(data::ConstElementPtr new_scopes);
127 
128 private:
129     /// @brief Sets partner scopes.
130     ///
131     /// @param new_scopes Partner scopes enclosed in a JSON list.
132     void setPartnerScopesInternal(data::ConstElementPtr new_scopes);
133 
134 public:
135     /// @brief Starts recurring heartbeat (public interface).
136     ///
137     /// @param interval heartbeat interval in milliseconds.
138     /// @param heartbeat_impl pointer to the heartbeat implementation
139     /// function.
140     void startHeartbeat(const long interval,
141                         const std::function<void()>& heartbeat_impl);
142 
143     /// @brief Stops recurring heartbeat.
144     void stopHeartbeat();
145 
146 private:
147     /// @brief Starts recurring heartbeat.
148     ///
149     /// @param interval heartbeat interval in milliseconds.
150     /// @param heartbeat_impl pointer to the heartbeat implementation
151     /// function.
152     void startHeartbeatInternal(const long interval = 0,
153                                 const std::function<void()>& heartbeat_impl = 0);
154 
155     /// @brief Stops recurring heartbeat.
156     void stopHeartbeatInternal();
157 
158 public:
159     /// @brief Checks if recurring heartbeat is running.
160     ///
161     /// @return true if heartbeat is running, false otherwise.
162     bool isHeartbeatRunning() const;
163 
164     /// @brief Pokes the communication state.
165     ///
166     /// Sets the last poke time to current time. If the heartbeat timer
167     /// has been scheduled, it is reset (starts over measuring the time
168     /// to the next heartbeat).
169     void poke();
170 
171 private:
172     /// @brief Pokes the communication state.
173     ///
174     /// Sets the last poke time to current time. If the heartbeat timer
175     /// has been scheduled, it is reset (starts over measuring the time
176     /// to the next heartbeat).
177     void pokeInternal();
178 
179 public:
180     /// @brief Returns duration between the poke time and current time.
181     ///
182     /// @return Duration between the poke time and current time.
183     int64_t getDurationInMillisecs() const;
184 
185     /// @brief Checks if communication with the partner is interrupted.
186     ///
187     /// This method checks if the communication with the partner appears
188     /// to be interrupted. This is the case when the time since last
189     /// successful communication is longer than the configured
190     /// max-response-delay value.
191     ///
192     /// @return true if communication is interrupted, false otherwise.
193     bool isCommunicationInterrupted() const;
194 
195     /// @brief Checks if the DHCP message appears to be unanswered.
196     ///
197     /// This method is used to provide the communication state with a
198     /// received DHCP message directed to the HA partner, to detect
199     /// if the partner fails to answer DHCP messages directed to it.
200     /// The DHCPv4 and DHCPv6 specific derivations implement this
201     /// functionality.
202     ///
203     /// This check is orthogonal to the heartbeat mechanism and is
204     /// usually triggered after several consecutive heartbeats fail
205     /// to be responded.
206     ///
207     /// The general approach to server failure detection is based on the
208     /// analysis of the "secs" field value (DHCPv4) and "elapsed time"
209     /// option value (DHCPv6). They indicate for how long the client
210     /// has been trying to complete the DHCP transaction. If these
211     /// values exceed a configured threshold, the client is considered
212     /// to fail to communicate with the server. This fact is recorded
213     /// by this object. If the number of distinct clients failing to
214     /// communicate with the partner exceeds a configured maximum
215     /// value, this server considers the partner to be offline. In this
216     /// case, this server will most likely start serving clients
217     /// which would normally be served by the partner.
218     ///
219     /// All information gathered by this method is cleared when the
220     /// @c poke method is invoked.
221     ///
222     /// @param message DHCP message to be analyzed. This must be the
223     /// message which belongs to the partner, i.e. the caller must
224     /// filter out messages belonging to the partner prior to calling
225     /// this method.
226     virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) = 0;
227 
228     /// @brief Returns the number of analyzed messages while being in the
229     /// communications interrupted state.
230     ///
231     /// @return Number of analyzed messages. It includes retransmissions by
232     /// the same clients.
233     size_t getAnalyzedMessagesCount() const;
234 
235     /// @brief Checks if the partner failure has been detected based
236     /// on the DHCP traffic analysis.
237     ///
238     /// In the special case when max-unacked-clients is set to 0 this
239     /// method always returns true. Note that max-unacked-clients
240     /// set to 0 means that failure detection is not really performed.
241     /// Returning true in that case simplifies the code of the
242     /// @c HAService which doesn't need to check if the failure detection
243     /// is enabled or not. It simply calls this method in the
244     /// 'communications interrupted' situation to check if the
245     /// server should be transitioned to the 'partner-down' state.
246     ///
247     /// @return true if the partner failure has been detected, false
248     /// otherwise.
249     virtual bool failureDetected() const = 0;
250 
251     /// @brief Returns the current number of clients which attempted
252     /// to get a lease from the partner server.
253     ///
254     /// The returned number is reset to 0 when the server successfully
255     /// establishes communication with the partner. The number is
256     /// incremented only in the communications interrupted case.
257     ///
258     /// @return The number of clients including unacked clients.
259     virtual size_t getConnectingClientsCount() const = 0;
260 
261     /// @brief Returns the current number of clients which haven't got
262     /// the lease from the partner server.
263     ///
264     /// The returned number is reset to 0 when the server successfully
265     /// establishes communication with the partner. The number is
266     /// incremented only in the communications interrupted case.
267     ///
268     /// @return Number of unacked clients.
269     virtual size_t getUnackedClientsCount() const = 0;
270 
271 protected:
272 
273     /// @brief Removes information about the clients the partner server
274     /// should respond to while communication with the partner was
275     /// interrupted.
276     ///
277     /// This information is cleared by the @c CommunicationState::poke.
278     /// The derivations of this class must provide DHCPv4 and DHCPv6 specific
279     /// implementations of this method. The @c poke method is called to
280     /// indicate that the connection has been successfully (re)established.
281     /// Therefore the clients counters are reset and the failure detection
282     /// procedure starts over.
283     ///
284     /// See @c CommunicationState::analyzeMessage for details.
285     virtual void clearConnectingClients() = 0;
286 
287 public:
288 
289     /// @brief Issues a warning about high clock skew between the active
290     /// servers if one is warranted.
291     ///
292     /// The HA service monitors the clock skew between the active servers. The
293     /// clock skew is calculated from the local time and the time returned by
294     /// the partner in response to a heartbeat. When clock skew exceeds a certain
295     /// threshold the HA service starts issuing a warning message. This method
296     /// returns true if the HA service should issue this message.
297     ///
298     /// Currently, the warning threshold for the clock skew is hardcoded to
299     /// 30 seconds.  In the future it may become configurable.
300     ///
301     /// This method is called for each heartbeat. If we issue a warning for each
302     /// heartbeat it may flood logs with those messages. This method provides
303     /// a gating mechanism which prevents the HA service from logging the
304     /// warning more often than every 60 seconds. If the last warning was issued
305     /// less than 60 seconds ago this method will return false even if the clock
306     /// skew exceeds the 30 seconds threshold. The correction of the clock skew
307     /// will reset the gating counter.
308     ///
309     /// @return true if the warning message should be logged because of the clock
310     /// skew exceeding a warning threshold.
311     bool clockSkewShouldWarn();
312 
313 private:
314     /// @brief Issues a warning about high clock skew between the active
315     /// servers if one is warranted.
316     ///
317     /// The HA service monitors the clock skew between the active servers. The
318     /// clock skew is calculated from the local time and the time returned by
319     /// the partner in response to a heartbeat. When clock skew exceeds a certain
320     /// threshold the HA service starts issuing a warning message. This method
321     /// returns true if the HA service should issue this message.
322     ///
323     /// Currently, the warning threshold for the clock skew is hardcoded to
324     /// 30 seconds.  In the future it may become configurable.
325     ///
326     /// This method is called for each heartbeat. If we issue a warning for each
327     /// heartbeat it may flood logs with those messages. This method provides
328     /// a gating mechanism which prevents the HA service from logging the
329     /// warning more often than every 60 seconds. If the last warning was issued
330     /// less than 60 seconds ago this method will return false even if the clock
331     /// skew exceeds the 30 seconds threshold. The correction of the clock skew
332     /// will reset the gating counter.
333     ///
334     /// @return true if the warning message should be logged because of the clock
335     /// skew exceeding a warning threshold.
336     bool clockSkewShouldWarnInternal();
337 
338 public:
339     /// @brief Indicates whether the HA service should enter "terminated"
340     /// state as a result of the clock skew exceeding maximum value.
341     ///
342     /// If the clocks on the active servers are not synchronized (perhaps as
343     /// a result of a warning message caused by @c clockSkewShouldWarn) and the
344     /// clocks further drift, the clock skew may exceed another threshold which
345     /// should cause the HA service to enter "terminated" state. In this state
346     /// the servers still respond to DHCP clients normally, but they will neither
347     /// send lease updates nor heartbeats. In this case, the administrator must
348     /// correct the problem (synchronize the clocks) and restart the service.
349     /// This method indicates whether the service should terminate or not.
350     ///
351     /// Currently, the terminal threshold for the clock skew is hardcoded to
352     /// 60 seconds.  In the future it may become configurable.
353     ///
354     /// @return true if the HA service should enter "terminated" state.
355     bool clockSkewShouldTerminate() const;
356 
357 private:
358     /// @brief Indicates whether the HA service should enter "terminated"
359     /// state as a result of the clock skew exceeding maximum value.
360     ///
361     /// If the clocks on the active servers are not synchronized (perhaps as
362     /// a result of a warning message caused by @c clockSkewShouldWarn) and the
363     /// clocks further drift, the clock skew may exceed another threshold which
364     /// should cause the HA service to enter "terminated" state. In this state
365     /// the servers still respond to DHCP clients normally, but they will neither
366     /// send lease updates nor heartbeats. In this case, the administrator must
367     /// correct the problem (synchronize the clocks) and restart the service.
368     /// This method indicates whether the service should terminate or not.
369     ///
370     /// Currently, the terminal threshold for the clock skew is hardcoded to
371     /// 60 seconds.  In the future it may become configurable.
372     ///
373     /// @return true if the HA service should enter "terminated" state.
374     bool clockSkewShouldTerminateInternal() const;
375 
376     /// @brief Checks if the clock skew is greater than the specified number
377     /// of seconds.
378     ///
379     /// @param seconds a positive value to compare the clock skew with.
380     /// @return true if the absolute clock skew is greater than the specified
381     /// number of seconds, false otherwise.
382     bool isClockSkewGreater(const long seconds) const;
383 
384 public:
385 
386     /// @brief Provide partner's notion of time so the new clock skew can be
387     /// calculated.
388     ///
389     /// @param time_text Partner's time received in response to a heartbeat. The
390     /// time must be provided in the RFC 1123 format.  It stores the current
391     /// time, partner's time, and the difference (skew) between them.
392     ///
393     /// @throw isc::http::HttpTimeConversionError if the time format is invalid.
394     ///
395     /// @todo Consider some other time formats which include millisecond
396     /// precision.
397     void setPartnerTime(const std::string& time_text);
398 
399 private:
400     /// @brief Provide partner's notion of time so the new clock skew can be
401     /// calculated.
402     ///
403     /// @param time_text Partner's time received in response to a heartbeat. The
404     /// time must be provided in the RFC 1123 format.  It stores the current
405     /// time, partner's time, and the difference (skew) between them.
406     ///
407     /// @throw isc::http::HttpTimeConversionError if the time format is invalid.
408     ///
409     /// @todo Consider some other time formats which include millisecond
410     /// precision.
411     void setPartnerTimeInternal(const std::string& time_text);
412 
413 public:
414     /// @brief Returns current clock skew value in the logger friendly format.
415     std::string logFormatClockSkew() const;
416 
417 private:
418     /// @brief Returns current clock skew value in the logger friendly format.
419     std::string logFormatClockSkewInternal() const;
420 
421 public:
422     /// @brief Returns the report about current communication state.
423     ///
424     /// This function returns a JSON map describing the state of communication
425     /// with a partner. This report is included in the response to the
426     /// status-get command.
427     ///
428     /// @return JSON element holding the report.
429     data::ElementPtr getReport() const;
430 
431     /// @brief Modifies poke time by adding seconds to it.
432     ///
433     /// Used in unittests only.
434     ///
435     /// @param secs number of seconds to be added to the poke time. If
436     /// the value is negative it will set the poke time in the past
437     /// comparing to current value.
438     void modifyPokeTime(const long secs);
439 
440 private:
441 
442     /// @brief Returns duration between the poke time and current time.
443     ///
444     /// Should be called in a thread safe context.
445     ///
446     /// @return Duration between the poke time and current time.
447     int64_t getDurationInMillisecsInternal() const;
448 
449 protected:
450     /// @brief Update the poke time and compute the duration.
451     ///
452     /// @return The time elapsed.
453     boost::posix_time::time_duration updatePokeTime();
454 
455 private:
456     /// @brief Update the poke time and compute the duration.
457     ///
458     /// Should be called in a thread safe context.
459     ///
460     /// @return The time elapsed.
461     boost::posix_time::time_duration updatePokeTimeInternal();
462 
463 public:
464 
465     /// @brief Returns a total number of unsent lease updates.
466     uint64_t getUnsentUpdateCount() const;
467 
468     /// @brief Increases a total number of unsent lease updates by 1.
469     ///
470     /// This method should be called when the server has allocated a
471     /// lease but decided to not send the lease update to its partner.
472     /// If the server is in the partner-down state it allocates new
473     /// leases but doesn't send lease updates because the partner is
474     /// unavailable.
475     ///
476     /// This method protects against setting the value to 0 in an
477     /// unlikely event of the overflow. The zero is reserved for the
478     /// server startup case.
479     void increaseUnsentUpdateCount();
480 
481 private:
482 
483     /// @brief Thread unsafe implementation of the @c increaseUnsentUpdateCount.
484     void increaseUnsentUpdateCountInternal();
485 
486 public:
487 
488     /// @brief Checks if the partner allocated new leases for which it hasn't sent
489     /// any lease updates.
490     ///
491     /// It compares a previous and current value of the @c partner_unsent_update_count_.
492     /// If the current value is 0 and the previous value is non-zero it indicates
493     /// that the partner was restarted.
494     ///
495     /// @return true if the partner has allocated new leases for which it didn't
496     /// send lease updates, false otherwise.
497     bool hasPartnerNewUnsentUpdates() const;
498 
499 private:
500 
501     /// @brief Thread unsafe implementation of the @c hasPartnerNewUnsentUpdates.
502     ///
503     /// @return true if the partner has allocated new leases for which it didn't
504     /// send lease updates, false otherwise.
505     bool hasPartnerNewUnsentUpdatesInternal() const;
506 
507 public:
508 
509     /// @brief Saves new total number of unsent lease updates from the partner.
510     ///
511     /// @param unsent_updates_count new total number of unsent lease updates from
512     /// the partner.
513     void setPartnerUnsentUpdateCount(uint64_t unsent_update_count);
514 
515 private:
516 
517     /// @brief Thread unsafe implementation of the @c setPartnerUnsentUpdateCount.
518     ///
519     /// @param unsent_updates_count new total number of unsent lease updates from
520     /// the partner.
521     void setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_count);
522 
523 protected:
524     /// @brief Pointer to the common IO service instance.
525     asiolink::IOServicePtr io_service_;
526 
527     /// @brief High availability configuration.
528     HAConfigPtr config_;
529 
530     /// @brief Interval timer triggering heartbeat commands.
531     asiolink::IntervalTimerPtr timer_;
532 
533     /// @brief Interval specified for the heartbeat.
534     long interval_;
535 
536     /// @brief Last poke time.
537     boost::posix_time::ptime poke_time_;
538 
539     /// @brief Pointer to the function providing heartbeat implementation.
540     std::function<void()> heartbeat_impl_;
541 
542     /// @brief Last known state of the partner server.
543     ///
544     /// Negative value means that the partner's state is unknown.
545     int partner_state_;
546 
547     /// @brief Last known set of scopes served by the partner server.
548     std::set<std::string> partner_scopes_;
549 
550     /// @brief Clock skew between the active servers.
551     boost::posix_time::time_duration clock_skew_;
552 
553     /// @brief Holds a time when last warning about too high clock skew
554     /// was issued.
555     boost::posix_time::ptime last_clock_skew_warn_;
556 
557     /// @brief My time when skew was calculated.
558     boost::posix_time::ptime my_time_at_skew_;
559 
560     /// @brief Partner reported time when skew was calculated.
561     boost::posix_time::ptime partner_time_at_skew_;
562 
563     /// @brief Total number of analyzed messages to be responded by partner.
564     size_t analyzed_messages_count_;
565 
566     /// @brief Total number of unsent lease updates.
567     ///
568     /// The lease updates are not sent when the server is in the partner
569     /// down state. The server counts the number of lease updates which
570     /// haven't been sent to the partner because the partner was unavailable.
571     /// The partner receives this value in a response to a heartbeat message
572     /// and can use it to determine if it should synchronize its lease
573     /// database.
574     uint64_t unsent_update_count_;
575 
576     /// @brief Previous and current total number of unsent lease updates
577     /// from the partner.
578     ///
579     /// This value is returned in response to a heartbeat command and saved
580     /// using the @c setPartnerUnsentUpdateCount. The previous value is
581     /// preserved so the values can be compared in the state handlers.
582     std::pair<uint64_t, uint64_t> partner_unsent_update_count_;
583 
584     /// @brief The mutex used to protect internal state.
585     const boost::scoped_ptr<std::mutex> mutex_;
586 };
587 
588 /// @brief Type of the pointer to the @c CommunicationState object.
589 typedef boost::shared_ptr<CommunicationState> CommunicationStatePtr;
590 
591 
592 /// @brief Holds communication state between DHCPv4 servers.
593 ///
594 /// This class implements DHCPv4 failure detection by monitoring the
595 /// value of the "secs" field in received DHCPv4 messages as described
596 /// in @c CommunicationState::analyzeMessage.
597 class CommunicationState4 : public CommunicationState {
598 public:
599 
600     /// @brief Constructor.
601     ///
602     /// @param io_service pointer to the common IO service instance.
603     /// @param config pointer to the HA configuration.
604     CommunicationState4(const asiolink::IOServicePtr& io_service,
605                         const HAConfigPtr& config);
606 
607     /// @brief Checks if the DHCPv4 message appears to be unanswered.
608     ///
609     /// This method uses "secs" field value for detecting client
610     /// communication failures as described in the
611     /// @c CommunicationState::analyzeMessage. Some misbehaving Windows
612     /// clients were reported to swap "secs" field bytes. In this case
613     /// the first byte is set to non-zero byte and the second byte is
614     /// set to 0. This method handles such cases and corrects bytes
615     /// order before comparing against the threshold.
616     ///
617     /// @param message DHCPv4 message to be analyzed. This must be the
618     /// message which belongs to the partner, i.e. the caller must
619     /// filter out messages belonging to the partner prior to calling
620     /// this method.
621     virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message);
622 
623     /// @brief Checks if the partner failure has been detected based
624     /// on the DHCP traffic analysis.
625     ///
626     /// @return true if the partner failure has been detected, false
627     /// otherwise.
628     virtual bool failureDetected() const;
629 
630     /// @brief Returns the current number of clients which attempted
631     /// to get a lease from the partner server.
632     ///
633     /// The returned number is reset to 0 when the server successfully
634     /// establishes communication with the partner. The number is
635     /// incremented only in the communications interrupted case.
636     ///
637     /// @return The number of clients including unacked clients.
638     virtual size_t getConnectingClientsCount() const;
639 
640     /// @brief Returns the current number of clients which haven't gotten
641     /// a lease from the partner server.
642     ///
643     /// The returned number is reset to 0 when the server successfully
644     /// establishes communication with the partner. The number is
645     /// incremented only in the communications interrupted case.
646     ///
647     /// @return Number of unacked clients.
648     virtual size_t getUnackedClientsCount() const;
649 
650 protected:
651 
652     /// @brief Checks if the DHCPv4 message appears to be unanswered.
653     ///
654     /// Should be called in a thread safe context.
655     ///
656     /// This method uses "secs" field value for detecting client
657     /// communication failures as described in the
658     /// @c CommunicationState::analyzeMessage. Some misbehaving Windows
659     /// clients were reported to swap "secs" field bytes. In this case
660     /// the first byte is set to non-zero byte and the second byte is
661     /// set to 0. This method handles such cases and corrects bytes
662     /// order before comparing against the threshold.
663     ///
664     /// @param message DHCPv4 message to be analyzed. This must be the
665     /// message which belongs to the partner, i.e. the caller must
666     /// filter out messages belonging to the partner prior to calling
667     /// this method.
668     virtual void analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message);
669 
670     /// @brief Checks if the partner failure has been detected based
671     /// on the DHCP traffic analysis.
672     ///
673     /// Should be called in a thread safe context.
674     ///
675     /// @return true if the partner failure has been detected, false
676     /// otherwise.
677     virtual bool failureDetectedInternal() const;
678 
679     /// @brief Removes information about the clients the partner server
680     /// should respond to while communication with the partner was
681     /// interrupted.
682     ///
683     /// See @c CommunicationState::analyzeMessage for details.
684     virtual void clearConnectingClients();
685 
686     /// @brief Structure holding information about the client which has
687     /// send the packet being analyzed.
688     struct ConnectingClient4 {
689         std::vector<uint8_t> hwaddr_;
690         std::vector<uint8_t> clientid_;
691         bool unacked_;
692     };
693 
694     /// @brief Multi index container holding information about the clients
695     /// attempting to get leases from the partner server.
696     typedef boost::multi_index_container<
697         ConnectingClient4,
698         boost::multi_index::indexed_by<
699             // First index is a composite index which allows to find a client
700             // by the HW address/client identifier tuple.
701             boost::multi_index::hashed_unique<
702                 boost::multi_index::composite_key<
703                     ConnectingClient4,
704                     boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>,
705                                                &ConnectingClient4::hwaddr_>,
706                     boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>,
707                                                &ConnectingClient4::clientid_>
708                 >
709             >,
710             // Second index allows for counting all clients which are
711             // considered unacked.
712             boost::multi_index::ordered_non_unique<
713                 boost::multi_index::member<ConnectingClient4, bool, &ConnectingClient4::unacked_>
714             >
715         >
716     > ConnectingClients4;
717 
718     /// @brief Holds information about the clients attempting to contact
719     /// the partner server while the servers are in communications
720     /// interrupted state.
721     ConnectingClients4 connecting_clients_;
722 };
723 
724 /// @brief Pointer to the @c CommunicationState4 object.
725 typedef boost::shared_ptr<CommunicationState4> CommunicationState4Ptr;
726 
727 /// @brief Holds communication state between DHCPv6 servers.
728 ///
729 /// This class implements DHCPv6 failure detection by monitoring the
730 /// value of the "Elapsed Time" option in received DHCPv6 messages as described
731 /// in @c CommunicationState::analyzeMessage.
732 class CommunicationState6 : public CommunicationState {
733 public:
734 
735     /// @brief Constructor.
736     ///
737     /// @param io_service pointer to the common IO service instance.
738     /// @param config pointer to the HA configuration.
739     CommunicationState6(const asiolink::IOServicePtr& io_service,
740                         const HAConfigPtr& config);
741 
742     /// @brief Checks if the DHCPv6 message appears to be unanswered.
743     ///
744     /// See @c CommunicationState::analyzeMessage for details.
745     ///
746     /// @param message DHCPv6 message to be analyzed. This must be the
747     /// message which belongs to the partner, i.e. the caller must
748     /// filter out messages belonging to the partner prior to calling
749     /// this method.
750     virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message);
751 
752     /// @brief Checks if the partner failure has been detected based
753     /// on the DHCP traffic analysis.
754     ///
755     /// @return true if the partner failure has been detected, false
756     /// otherwise.
757     virtual bool failureDetected() const;
758 
759     /// @brief Returns the current number of clients which attempted
760     /// to get a lease from the partner server.
761     ///
762     /// The returned number is reset to 0 when the server successfully
763     /// establishes communication with the partner. The number is
764     /// incremented only in the communications interrupted case.
765     ///
766     /// @return The number of clients including unacked clients.
767     virtual size_t getConnectingClientsCount() const;
768 
769     /// @brief Returns the current number of clients which haven't gotten
770     /// a lease from the partner server.
771     ///
772     /// The returned number is reset to 0 when the server successfully
773     /// establishes communication with the partner. The number is
774     /// incremented only in the communications interrupted case.
775     ///
776     /// @return Number of unacked clients.
777     virtual size_t getUnackedClientsCount() const;
778 
779 protected:
780 
781     /// @brief Checks if the DHCPv6 message appears to be unanswered.
782     ///
783     /// Should be called in a thread safe context.
784     ///
785     /// See @c CommunicationState::analyzeMessage for details.
786     ///
787     /// @param message DHCPv6 message to be analyzed. This must be the
788     /// message which belongs to the partner, i.e. the caller must
789     /// filter out messages belonging to the partner prior to calling
790     /// this method.
791     virtual void analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message);
792 
793     /// @brief Checks if the partner failure has been detected based
794     /// on the DHCP traffic analysis.
795     ///
796     /// Should be called in a thread safe context.
797     ///
798     /// @return true if the partner failure has been detected, false
799     /// otherwise.
800     virtual bool failureDetectedInternal() const;
801 
802     /// @brief Removes information about the clients the partner server
803     /// should respond to while communication with the partner was
804     /// interrupted.
805     ///
806     /// See @c CommunicationState::analyzeMessage for details.
807     virtual void clearConnectingClients();
808 
809     /// @brief Structure holding information about a client which
810     /// sent a packet being analyzed.
811     struct ConnectingClient6 {
812         std::vector<uint8_t> duid_;
813         bool unacked_;
814     };
815 
816     /// @brief Multi index container holding information about the clients
817     /// attempting to get leases from the partner server.
818     typedef boost::multi_index_container<
819         ConnectingClient6,
820         boost::multi_index::indexed_by<
821             // First index is for accessing connecting clients by DUID.
822             boost::multi_index::hashed_unique<
823                 boost::multi_index::member<ConnectingClient6, std::vector<uint8_t>,
824                                            &ConnectingClient6::duid_>
825             >,
826             // Second index allows for counting all clients which are
827             // considered unacked.
828             boost::multi_index::ordered_non_unique<
829                 boost::multi_index::member<ConnectingClient6, bool, &ConnectingClient6::unacked_>
830             >
831         >
832     > ConnectingClients6;
833 
834     /// @brief Holds information about the clients attempting to contact
835     /// the partner server while the servers are in communications
836     /// interrupted state.
837     ConnectingClients6 connecting_clients_;
838 };
839 
840 /// @brief Pointer to the @c CommunicationState6 object.
841 typedef boost::shared_ptr<CommunicationState6> CommunicationState6Ptr;
842 
843 } // end of namespace isc::ha
844 } // end of namespace isc
845 
846 #endif
847