1 // Copyright (C) 2018-2021 Internet Systems Consortium, Inc. ("ISC") 2 // 3 // This Source Code Form is subject to the terms of the Mozilla Public 4 // License, v. 2.0. If a copy of the MPL was not distributed with this 5 // file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7 #ifndef HA_COMMUNICATION_STATE_H 8 #define HA_COMMUNICATION_STATE_H 9 10 #include <ha_config.h> 11 #include <ha_service_states.h> 12 #include <asiolink/interval_timer.h> 13 #include <asiolink/io_service.h> 14 #include <cc/data.h> 15 #include <dhcp/pkt.h> 16 17 #include <boost/date_time/posix_time/posix_time.hpp> 18 #include <boost/multi_index_container.hpp> 19 #include <boost/multi_index/composite_key.hpp> 20 #include <boost/multi_index/hashed_index.hpp> 21 #include <boost/multi_index/indexed_by.hpp> 22 #include <boost/multi_index/member.hpp> 23 #include <boost/multi_index/ordered_index.hpp> 24 #include <boost/scoped_ptr.hpp> 25 #include <boost/shared_ptr.hpp> 26 27 #include <functional> 28 #include <map> 29 #include <mutex> 30 #include <set> 31 #include <string> 32 #include <utility> 33 34 namespace isc { 35 namespace ha { 36 37 /// @brief Holds communication state between the two HA peers. 38 /// 39 /// The HA service constantly monitors the state of the connection between 40 /// the two peers. If the connection is lost it is an indicator that 41 /// the partner server may be down and failover actions should be triggered. 42 /// 43 /// A heartbeat command successfully sent over the control channel is an 44 /// indicator that the connection is healthy. A reply to the heartbeat 45 /// command includes information about the recipient state, its notion of 46 /// time, and other information useful for determining its health and 47 /// current activity. 48 /// 49 /// This class uses an interval timer to run heartbeat commands over the 50 /// control channel. The implementation of the heartbeat is external to 51 /// this class and is provided via @c CommunicationState::startHeartbeat 52 /// method. This implementation is required to run the @c poke method 53 /// in case of receiving a successful response to the heartbeat command. 54 /// 55 /// The @c poke method sets the "last poke time" to current time, thus 56 /// indicating that the connection is healthy. The @c getDurationInMillisecs 57 /// method is used to check for how long the server hasn't been able 58 /// to communicate with the partner. This duration is simply a time 59 /// elapsed since last successful poke time. If this duration becomes 60 /// greater than the configured threshold, the server assumes that the 61 /// communication with the partner is interrupted. 62 /// 63 /// The derivations of this class provide DHCPv4 and DHCPv6 specific 64 /// mechanisms for detecting server failures based on the analysis of 65 /// the received DHCP messages, i.e. how long the clients have been 66 /// trying to communicate with the partner and message types they sent. 67 /// In particular, the increased number of Rebind messages may indicate 68 /// issues with the DHCP server. 69 /// 70 /// This class is also used to monitor the clock skew between the active 71 /// servers. Maintaining a reasonably low clock skew is essential for the 72 /// HA service to function properly. This class calculates the clock 73 /// skew by comparing local time of the server with the time returned by 74 /// the partner in response to a heartbeat command. If this value exceeds 75 /// the certain thresholds, the CommunicationState::clockSkewShouldWarn 76 /// and the @c CommuicationState::clockSkewShouldTerminate indicate 77 /// whether the HA service should continue to operate normally, should 78 /// start issuing a warning about high clock skew or simply enter the 79 /// "terminated" state refusing to further operate until the clocks 80 /// are synchronized. This requires administrative intervention and the 81 /// restart of the HA service. 82 class CommunicationState { 83 public: 84 85 /// @brief Constructor. 86 /// 87 /// @param io_service pointer to the common IO service instance. 88 /// @param config pointer to the HA configuration. 89 CommunicationState(const asiolink::IOServicePtr& io_service, 90 const HAConfigPtr& config); 91 92 /// @brief Destructor. 93 /// 94 /// Stops scheduled heartbeat. 95 virtual ~CommunicationState(); 96 97 /// @brief Returns last known state of the partner. 98 /// 99 /// @return Partner's state if it is known, or a negative value otherwise. 100 int getPartnerState() const; 101 102 /// @brief Sets partner state. 103 /// 104 /// @param state new partner's state in a textual form. Supported values are 105 /// those returned in response to a ha-heartbeat command. 106 /// @throw BadValue if unsupported state value was provided. 107 void setPartnerState(const std::string& state); 108 109 private: 110 /// @brief Sets partner state. 111 /// 112 /// @param state new partner's state in a textual form. Supported values are 113 /// those returned in response to a ha-heartbeat command. 114 /// @throw BadValue if unsupported state value was provided. 115 void setPartnerStateInternal(const std::string& state); 116 117 public: 118 /// @brief Returns scopes served by the partner server. 119 /// 120 /// @return A set of scopes served by the partner. 121 std::set<std::string> getPartnerScopes() const; 122 123 /// @brief Sets partner scopes. 124 /// 125 /// @param new_scopes Partner scopes enclosed in a JSON list. 126 void setPartnerScopes(data::ConstElementPtr new_scopes); 127 128 private: 129 /// @brief Sets partner scopes. 130 /// 131 /// @param new_scopes Partner scopes enclosed in a JSON list. 132 void setPartnerScopesInternal(data::ConstElementPtr new_scopes); 133 134 public: 135 /// @brief Starts recurring heartbeat (public interface). 136 /// 137 /// @param interval heartbeat interval in milliseconds. 138 /// @param heartbeat_impl pointer to the heartbeat implementation 139 /// function. 140 void startHeartbeat(const long interval, 141 const std::function<void()>& heartbeat_impl); 142 143 /// @brief Stops recurring heartbeat. 144 void stopHeartbeat(); 145 146 private: 147 /// @brief Starts recurring heartbeat. 148 /// 149 /// @param interval heartbeat interval in milliseconds. 150 /// @param heartbeat_impl pointer to the heartbeat implementation 151 /// function. 152 void startHeartbeatInternal(const long interval = 0, 153 const std::function<void()>& heartbeat_impl = 0); 154 155 /// @brief Stops recurring heartbeat. 156 void stopHeartbeatInternal(); 157 158 public: 159 /// @brief Checks if recurring heartbeat is running. 160 /// 161 /// @return true if heartbeat is running, false otherwise. 162 bool isHeartbeatRunning() const; 163 164 /// @brief Pokes the communication state. 165 /// 166 /// Sets the last poke time to current time. If the heartbeat timer 167 /// has been scheduled, it is reset (starts over measuring the time 168 /// to the next heartbeat). 169 void poke(); 170 171 private: 172 /// @brief Pokes the communication state. 173 /// 174 /// Sets the last poke time to current time. If the heartbeat timer 175 /// has been scheduled, it is reset (starts over measuring the time 176 /// to the next heartbeat). 177 void pokeInternal(); 178 179 public: 180 /// @brief Returns duration between the poke time and current time. 181 /// 182 /// @return Duration between the poke time and current time. 183 int64_t getDurationInMillisecs() const; 184 185 /// @brief Checks if communication with the partner is interrupted. 186 /// 187 /// This method checks if the communication with the partner appears 188 /// to be interrupted. This is the case when the time since last 189 /// successful communication is longer than the configured 190 /// max-response-delay value. 191 /// 192 /// @return true if communication is interrupted, false otherwise. 193 bool isCommunicationInterrupted() const; 194 195 /// @brief Checks if the DHCP message appears to be unanswered. 196 /// 197 /// This method is used to provide the communication state with a 198 /// received DHCP message directed to the HA partner, to detect 199 /// if the partner fails to answer DHCP messages directed to it. 200 /// The DHCPv4 and DHCPv6 specific derivations implement this 201 /// functionality. 202 /// 203 /// This check is orthogonal to the heartbeat mechanism and is 204 /// usually triggered after several consecutive heartbeats fail 205 /// to be responded. 206 /// 207 /// The general approach to server failure detection is based on the 208 /// analysis of the "secs" field value (DHCPv4) and "elapsed time" 209 /// option value (DHCPv6). They indicate for how long the client 210 /// has been trying to complete the DHCP transaction. If these 211 /// values exceed a configured threshold, the client is considered 212 /// to fail to communicate with the server. This fact is recorded 213 /// by this object. If the number of distinct clients failing to 214 /// communicate with the partner exceeds a configured maximum 215 /// value, this server considers the partner to be offline. In this 216 /// case, this server will most likely start serving clients 217 /// which would normally be served by the partner. 218 /// 219 /// All information gathered by this method is cleared when the 220 /// @c poke method is invoked. 221 /// 222 /// @param message DHCP message to be analyzed. This must be the 223 /// message which belongs to the partner, i.e. the caller must 224 /// filter out messages belonging to the partner prior to calling 225 /// this method. 226 virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) = 0; 227 228 /// @brief Returns the number of analyzed messages while being in the 229 /// communications interrupted state. 230 /// 231 /// @return Number of analyzed messages. It includes retransmissions by 232 /// the same clients. 233 size_t getAnalyzedMessagesCount() const; 234 235 /// @brief Checks if the partner failure has been detected based 236 /// on the DHCP traffic analysis. 237 /// 238 /// In the special case when max-unacked-clients is set to 0 this 239 /// method always returns true. Note that max-unacked-clients 240 /// set to 0 means that failure detection is not really performed. 241 /// Returning true in that case simplifies the code of the 242 /// @c HAService which doesn't need to check if the failure detection 243 /// is enabled or not. It simply calls this method in the 244 /// 'communications interrupted' situation to check if the 245 /// server should be transitioned to the 'partner-down' state. 246 /// 247 /// @return true if the partner failure has been detected, false 248 /// otherwise. 249 virtual bool failureDetected() const = 0; 250 251 /// @brief Returns the current number of clients which attempted 252 /// to get a lease from the partner server. 253 /// 254 /// The returned number is reset to 0 when the server successfully 255 /// establishes communication with the partner. The number is 256 /// incremented only in the communications interrupted case. 257 /// 258 /// @return The number of clients including unacked clients. 259 virtual size_t getConnectingClientsCount() const = 0; 260 261 /// @brief Returns the current number of clients which haven't got 262 /// the lease from the partner server. 263 /// 264 /// The returned number is reset to 0 when the server successfully 265 /// establishes communication with the partner. The number is 266 /// incremented only in the communications interrupted case. 267 /// 268 /// @return Number of unacked clients. 269 virtual size_t getUnackedClientsCount() const = 0; 270 271 protected: 272 273 /// @brief Removes information about the clients the partner server 274 /// should respond to while communication with the partner was 275 /// interrupted. 276 /// 277 /// This information is cleared by the @c CommunicationState::poke. 278 /// The derivations of this class must provide DHCPv4 and DHCPv6 specific 279 /// implementations of this method. The @c poke method is called to 280 /// indicate that the connection has been successfully (re)established. 281 /// Therefore the clients counters are reset and the failure detection 282 /// procedure starts over. 283 /// 284 /// See @c CommunicationState::analyzeMessage for details. 285 virtual void clearConnectingClients() = 0; 286 287 public: 288 289 /// @brief Issues a warning about high clock skew between the active 290 /// servers if one is warranted. 291 /// 292 /// The HA service monitors the clock skew between the active servers. The 293 /// clock skew is calculated from the local time and the time returned by 294 /// the partner in response to a heartbeat. When clock skew exceeds a certain 295 /// threshold the HA service starts issuing a warning message. This method 296 /// returns true if the HA service should issue this message. 297 /// 298 /// Currently, the warning threshold for the clock skew is hardcoded to 299 /// 30 seconds. In the future it may become configurable. 300 /// 301 /// This method is called for each heartbeat. If we issue a warning for each 302 /// heartbeat it may flood logs with those messages. This method provides 303 /// a gating mechanism which prevents the HA service from logging the 304 /// warning more often than every 60 seconds. If the last warning was issued 305 /// less than 60 seconds ago this method will return false even if the clock 306 /// skew exceeds the 30 seconds threshold. The correction of the clock skew 307 /// will reset the gating counter. 308 /// 309 /// @return true if the warning message should be logged because of the clock 310 /// skew exceeding a warning threshold. 311 bool clockSkewShouldWarn(); 312 313 private: 314 /// @brief Issues a warning about high clock skew between the active 315 /// servers if one is warranted. 316 /// 317 /// The HA service monitors the clock skew between the active servers. The 318 /// clock skew is calculated from the local time and the time returned by 319 /// the partner in response to a heartbeat. When clock skew exceeds a certain 320 /// threshold the HA service starts issuing a warning message. This method 321 /// returns true if the HA service should issue this message. 322 /// 323 /// Currently, the warning threshold for the clock skew is hardcoded to 324 /// 30 seconds. In the future it may become configurable. 325 /// 326 /// This method is called for each heartbeat. If we issue a warning for each 327 /// heartbeat it may flood logs with those messages. This method provides 328 /// a gating mechanism which prevents the HA service from logging the 329 /// warning more often than every 60 seconds. If the last warning was issued 330 /// less than 60 seconds ago this method will return false even if the clock 331 /// skew exceeds the 30 seconds threshold. The correction of the clock skew 332 /// will reset the gating counter. 333 /// 334 /// @return true if the warning message should be logged because of the clock 335 /// skew exceeding a warning threshold. 336 bool clockSkewShouldWarnInternal(); 337 338 public: 339 /// @brief Indicates whether the HA service should enter "terminated" 340 /// state as a result of the clock skew exceeding maximum value. 341 /// 342 /// If the clocks on the active servers are not synchronized (perhaps as 343 /// a result of a warning message caused by @c clockSkewShouldWarn) and the 344 /// clocks further drift, the clock skew may exceed another threshold which 345 /// should cause the HA service to enter "terminated" state. In this state 346 /// the servers still respond to DHCP clients normally, but they will neither 347 /// send lease updates nor heartbeats. In this case, the administrator must 348 /// correct the problem (synchronize the clocks) and restart the service. 349 /// This method indicates whether the service should terminate or not. 350 /// 351 /// Currently, the terminal threshold for the clock skew is hardcoded to 352 /// 60 seconds. In the future it may become configurable. 353 /// 354 /// @return true if the HA service should enter "terminated" state. 355 bool clockSkewShouldTerminate() const; 356 357 private: 358 /// @brief Indicates whether the HA service should enter "terminated" 359 /// state as a result of the clock skew exceeding maximum value. 360 /// 361 /// If the clocks on the active servers are not synchronized (perhaps as 362 /// a result of a warning message caused by @c clockSkewShouldWarn) and the 363 /// clocks further drift, the clock skew may exceed another threshold which 364 /// should cause the HA service to enter "terminated" state. In this state 365 /// the servers still respond to DHCP clients normally, but they will neither 366 /// send lease updates nor heartbeats. In this case, the administrator must 367 /// correct the problem (synchronize the clocks) and restart the service. 368 /// This method indicates whether the service should terminate or not. 369 /// 370 /// Currently, the terminal threshold for the clock skew is hardcoded to 371 /// 60 seconds. In the future it may become configurable. 372 /// 373 /// @return true if the HA service should enter "terminated" state. 374 bool clockSkewShouldTerminateInternal() const; 375 376 /// @brief Checks if the clock skew is greater than the specified number 377 /// of seconds. 378 /// 379 /// @param seconds a positive value to compare the clock skew with. 380 /// @return true if the absolute clock skew is greater than the specified 381 /// number of seconds, false otherwise. 382 bool isClockSkewGreater(const long seconds) const; 383 384 public: 385 386 /// @brief Provide partner's notion of time so the new clock skew can be 387 /// calculated. 388 /// 389 /// @param time_text Partner's time received in response to a heartbeat. The 390 /// time must be provided in the RFC 1123 format. It stores the current 391 /// time, partner's time, and the difference (skew) between them. 392 /// 393 /// @throw isc::http::HttpTimeConversionError if the time format is invalid. 394 /// 395 /// @todo Consider some other time formats which include millisecond 396 /// precision. 397 void setPartnerTime(const std::string& time_text); 398 399 private: 400 /// @brief Provide partner's notion of time so the new clock skew can be 401 /// calculated. 402 /// 403 /// @param time_text Partner's time received in response to a heartbeat. The 404 /// time must be provided in the RFC 1123 format. It stores the current 405 /// time, partner's time, and the difference (skew) between them. 406 /// 407 /// @throw isc::http::HttpTimeConversionError if the time format is invalid. 408 /// 409 /// @todo Consider some other time formats which include millisecond 410 /// precision. 411 void setPartnerTimeInternal(const std::string& time_text); 412 413 public: 414 /// @brief Returns current clock skew value in the logger friendly format. 415 std::string logFormatClockSkew() const; 416 417 private: 418 /// @brief Returns current clock skew value in the logger friendly format. 419 std::string logFormatClockSkewInternal() const; 420 421 public: 422 /// @brief Returns the report about current communication state. 423 /// 424 /// This function returns a JSON map describing the state of communication 425 /// with a partner. This report is included in the response to the 426 /// status-get command. 427 /// 428 /// @return JSON element holding the report. 429 data::ElementPtr getReport() const; 430 431 /// @brief Modifies poke time by adding seconds to it. 432 /// 433 /// Used in unittests only. 434 /// 435 /// @param secs number of seconds to be added to the poke time. If 436 /// the value is negative it will set the poke time in the past 437 /// comparing to current value. 438 void modifyPokeTime(const long secs); 439 440 private: 441 442 /// @brief Returns duration between the poke time and current time. 443 /// 444 /// Should be called in a thread safe context. 445 /// 446 /// @return Duration between the poke time and current time. 447 int64_t getDurationInMillisecsInternal() const; 448 449 protected: 450 /// @brief Update the poke time and compute the duration. 451 /// 452 /// @return The time elapsed. 453 boost::posix_time::time_duration updatePokeTime(); 454 455 private: 456 /// @brief Update the poke time and compute the duration. 457 /// 458 /// Should be called in a thread safe context. 459 /// 460 /// @return The time elapsed. 461 boost::posix_time::time_duration updatePokeTimeInternal(); 462 463 public: 464 465 /// @brief Returns a total number of unsent lease updates. 466 uint64_t getUnsentUpdateCount() const; 467 468 /// @brief Increases a total number of unsent lease updates by 1. 469 /// 470 /// This method should be called when the server has allocated a 471 /// lease but decided to not send the lease update to its partner. 472 /// If the server is in the partner-down state it allocates new 473 /// leases but doesn't send lease updates because the partner is 474 /// unavailable. 475 /// 476 /// This method protects against setting the value to 0 in an 477 /// unlikely event of the overflow. The zero is reserved for the 478 /// server startup case. 479 void increaseUnsentUpdateCount(); 480 481 private: 482 483 /// @brief Thread unsafe implementation of the @c increaseUnsentUpdateCount. 484 void increaseUnsentUpdateCountInternal(); 485 486 public: 487 488 /// @brief Checks if the partner allocated new leases for which it hasn't sent 489 /// any lease updates. 490 /// 491 /// It compares a previous and current value of the @c partner_unsent_update_count_. 492 /// If the current value is 0 and the previous value is non-zero it indicates 493 /// that the partner was restarted. 494 /// 495 /// @return true if the partner has allocated new leases for which it didn't 496 /// send lease updates, false otherwise. 497 bool hasPartnerNewUnsentUpdates() const; 498 499 private: 500 501 /// @brief Thread unsafe implementation of the @c hasPartnerNewUnsentUpdates. 502 /// 503 /// @return true if the partner has allocated new leases for which it didn't 504 /// send lease updates, false otherwise. 505 bool hasPartnerNewUnsentUpdatesInternal() const; 506 507 public: 508 509 /// @brief Saves new total number of unsent lease updates from the partner. 510 /// 511 /// @param unsent_updates_count new total number of unsent lease updates from 512 /// the partner. 513 void setPartnerUnsentUpdateCount(uint64_t unsent_update_count); 514 515 private: 516 517 /// @brief Thread unsafe implementation of the @c setPartnerUnsentUpdateCount. 518 /// 519 /// @param unsent_updates_count new total number of unsent lease updates from 520 /// the partner. 521 void setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_count); 522 523 protected: 524 /// @brief Pointer to the common IO service instance. 525 asiolink::IOServicePtr io_service_; 526 527 /// @brief High availability configuration. 528 HAConfigPtr config_; 529 530 /// @brief Interval timer triggering heartbeat commands. 531 asiolink::IntervalTimerPtr timer_; 532 533 /// @brief Interval specified for the heartbeat. 534 long interval_; 535 536 /// @brief Last poke time. 537 boost::posix_time::ptime poke_time_; 538 539 /// @brief Pointer to the function providing heartbeat implementation. 540 std::function<void()> heartbeat_impl_; 541 542 /// @brief Last known state of the partner server. 543 /// 544 /// Negative value means that the partner's state is unknown. 545 int partner_state_; 546 547 /// @brief Last known set of scopes served by the partner server. 548 std::set<std::string> partner_scopes_; 549 550 /// @brief Clock skew between the active servers. 551 boost::posix_time::time_duration clock_skew_; 552 553 /// @brief Holds a time when last warning about too high clock skew 554 /// was issued. 555 boost::posix_time::ptime last_clock_skew_warn_; 556 557 /// @brief My time when skew was calculated. 558 boost::posix_time::ptime my_time_at_skew_; 559 560 /// @brief Partner reported time when skew was calculated. 561 boost::posix_time::ptime partner_time_at_skew_; 562 563 /// @brief Total number of analyzed messages to be responded by partner. 564 size_t analyzed_messages_count_; 565 566 /// @brief Total number of unsent lease updates. 567 /// 568 /// The lease updates are not sent when the server is in the partner 569 /// down state. The server counts the number of lease updates which 570 /// haven't been sent to the partner because the partner was unavailable. 571 /// The partner receives this value in a response to a heartbeat message 572 /// and can use it to determine if it should synchronize its lease 573 /// database. 574 uint64_t unsent_update_count_; 575 576 /// @brief Previous and current total number of unsent lease updates 577 /// from the partner. 578 /// 579 /// This value is returned in response to a heartbeat command and saved 580 /// using the @c setPartnerUnsentUpdateCount. The previous value is 581 /// preserved so the values can be compared in the state handlers. 582 std::pair<uint64_t, uint64_t> partner_unsent_update_count_; 583 584 /// @brief The mutex used to protect internal state. 585 const boost::scoped_ptr<std::mutex> mutex_; 586 }; 587 588 /// @brief Type of the pointer to the @c CommunicationState object. 589 typedef boost::shared_ptr<CommunicationState> CommunicationStatePtr; 590 591 592 /// @brief Holds communication state between DHCPv4 servers. 593 /// 594 /// This class implements DHCPv4 failure detection by monitoring the 595 /// value of the "secs" field in received DHCPv4 messages as described 596 /// in @c CommunicationState::analyzeMessage. 597 class CommunicationState4 : public CommunicationState { 598 public: 599 600 /// @brief Constructor. 601 /// 602 /// @param io_service pointer to the common IO service instance. 603 /// @param config pointer to the HA configuration. 604 CommunicationState4(const asiolink::IOServicePtr& io_service, 605 const HAConfigPtr& config); 606 607 /// @brief Checks if the DHCPv4 message appears to be unanswered. 608 /// 609 /// This method uses "secs" field value for detecting client 610 /// communication failures as described in the 611 /// @c CommunicationState::analyzeMessage. Some misbehaving Windows 612 /// clients were reported to swap "secs" field bytes. In this case 613 /// the first byte is set to non-zero byte and the second byte is 614 /// set to 0. This method handles such cases and corrects bytes 615 /// order before comparing against the threshold. 616 /// 617 /// @param message DHCPv4 message to be analyzed. This must be the 618 /// message which belongs to the partner, i.e. the caller must 619 /// filter out messages belonging to the partner prior to calling 620 /// this method. 621 virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message); 622 623 /// @brief Checks if the partner failure has been detected based 624 /// on the DHCP traffic analysis. 625 /// 626 /// @return true if the partner failure has been detected, false 627 /// otherwise. 628 virtual bool failureDetected() const; 629 630 /// @brief Returns the current number of clients which attempted 631 /// to get a lease from the partner server. 632 /// 633 /// The returned number is reset to 0 when the server successfully 634 /// establishes communication with the partner. The number is 635 /// incremented only in the communications interrupted case. 636 /// 637 /// @return The number of clients including unacked clients. 638 virtual size_t getConnectingClientsCount() const; 639 640 /// @brief Returns the current number of clients which haven't gotten 641 /// a lease from the partner server. 642 /// 643 /// The returned number is reset to 0 when the server successfully 644 /// establishes communication with the partner. The number is 645 /// incremented only in the communications interrupted case. 646 /// 647 /// @return Number of unacked clients. 648 virtual size_t getUnackedClientsCount() const; 649 650 protected: 651 652 /// @brief Checks if the DHCPv4 message appears to be unanswered. 653 /// 654 /// Should be called in a thread safe context. 655 /// 656 /// This method uses "secs" field value for detecting client 657 /// communication failures as described in the 658 /// @c CommunicationState::analyzeMessage. Some misbehaving Windows 659 /// clients were reported to swap "secs" field bytes. In this case 660 /// the first byte is set to non-zero byte and the second byte is 661 /// set to 0. This method handles such cases and corrects bytes 662 /// order before comparing against the threshold. 663 /// 664 /// @param message DHCPv4 message to be analyzed. This must be the 665 /// message which belongs to the partner, i.e. the caller must 666 /// filter out messages belonging to the partner prior to calling 667 /// this method. 668 virtual void analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message); 669 670 /// @brief Checks if the partner failure has been detected based 671 /// on the DHCP traffic analysis. 672 /// 673 /// Should be called in a thread safe context. 674 /// 675 /// @return true if the partner failure has been detected, false 676 /// otherwise. 677 virtual bool failureDetectedInternal() const; 678 679 /// @brief Removes information about the clients the partner server 680 /// should respond to while communication with the partner was 681 /// interrupted. 682 /// 683 /// See @c CommunicationState::analyzeMessage for details. 684 virtual void clearConnectingClients(); 685 686 /// @brief Structure holding information about the client which has 687 /// send the packet being analyzed. 688 struct ConnectingClient4 { 689 std::vector<uint8_t> hwaddr_; 690 std::vector<uint8_t> clientid_; 691 bool unacked_; 692 }; 693 694 /// @brief Multi index container holding information about the clients 695 /// attempting to get leases from the partner server. 696 typedef boost::multi_index_container< 697 ConnectingClient4, 698 boost::multi_index::indexed_by< 699 // First index is a composite index which allows to find a client 700 // by the HW address/client identifier tuple. 701 boost::multi_index::hashed_unique< 702 boost::multi_index::composite_key< 703 ConnectingClient4, 704 boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>, 705 &ConnectingClient4::hwaddr_>, 706 boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>, 707 &ConnectingClient4::clientid_> 708 > 709 >, 710 // Second index allows for counting all clients which are 711 // considered unacked. 712 boost::multi_index::ordered_non_unique< 713 boost::multi_index::member<ConnectingClient4, bool, &ConnectingClient4::unacked_> 714 > 715 > 716 > ConnectingClients4; 717 718 /// @brief Holds information about the clients attempting to contact 719 /// the partner server while the servers are in communications 720 /// interrupted state. 721 ConnectingClients4 connecting_clients_; 722 }; 723 724 /// @brief Pointer to the @c CommunicationState4 object. 725 typedef boost::shared_ptr<CommunicationState4> CommunicationState4Ptr; 726 727 /// @brief Holds communication state between DHCPv6 servers. 728 /// 729 /// This class implements DHCPv6 failure detection by monitoring the 730 /// value of the "Elapsed Time" option in received DHCPv6 messages as described 731 /// in @c CommunicationState::analyzeMessage. 732 class CommunicationState6 : public CommunicationState { 733 public: 734 735 /// @brief Constructor. 736 /// 737 /// @param io_service pointer to the common IO service instance. 738 /// @param config pointer to the HA configuration. 739 CommunicationState6(const asiolink::IOServicePtr& io_service, 740 const HAConfigPtr& config); 741 742 /// @brief Checks if the DHCPv6 message appears to be unanswered. 743 /// 744 /// See @c CommunicationState::analyzeMessage for details. 745 /// 746 /// @param message DHCPv6 message to be analyzed. This must be the 747 /// message which belongs to the partner, i.e. the caller must 748 /// filter out messages belonging to the partner prior to calling 749 /// this method. 750 virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message); 751 752 /// @brief Checks if the partner failure has been detected based 753 /// on the DHCP traffic analysis. 754 /// 755 /// @return true if the partner failure has been detected, false 756 /// otherwise. 757 virtual bool failureDetected() const; 758 759 /// @brief Returns the current number of clients which attempted 760 /// to get a lease from the partner server. 761 /// 762 /// The returned number is reset to 0 when the server successfully 763 /// establishes communication with the partner. The number is 764 /// incremented only in the communications interrupted case. 765 /// 766 /// @return The number of clients including unacked clients. 767 virtual size_t getConnectingClientsCount() const; 768 769 /// @brief Returns the current number of clients which haven't gotten 770 /// a lease from the partner server. 771 /// 772 /// The returned number is reset to 0 when the server successfully 773 /// establishes communication with the partner. The number is 774 /// incremented only in the communications interrupted case. 775 /// 776 /// @return Number of unacked clients. 777 virtual size_t getUnackedClientsCount() const; 778 779 protected: 780 781 /// @brief Checks if the DHCPv6 message appears to be unanswered. 782 /// 783 /// Should be called in a thread safe context. 784 /// 785 /// See @c CommunicationState::analyzeMessage for details. 786 /// 787 /// @param message DHCPv6 message to be analyzed. This must be the 788 /// message which belongs to the partner, i.e. the caller must 789 /// filter out messages belonging to the partner prior to calling 790 /// this method. 791 virtual void analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message); 792 793 /// @brief Checks if the partner failure has been detected based 794 /// on the DHCP traffic analysis. 795 /// 796 /// Should be called in a thread safe context. 797 /// 798 /// @return true if the partner failure has been detected, false 799 /// otherwise. 800 virtual bool failureDetectedInternal() const; 801 802 /// @brief Removes information about the clients the partner server 803 /// should respond to while communication with the partner was 804 /// interrupted. 805 /// 806 /// See @c CommunicationState::analyzeMessage for details. 807 virtual void clearConnectingClients(); 808 809 /// @brief Structure holding information about a client which 810 /// sent a packet being analyzed. 811 struct ConnectingClient6 { 812 std::vector<uint8_t> duid_; 813 bool unacked_; 814 }; 815 816 /// @brief Multi index container holding information about the clients 817 /// attempting to get leases from the partner server. 818 typedef boost::multi_index_container< 819 ConnectingClient6, 820 boost::multi_index::indexed_by< 821 // First index is for accessing connecting clients by DUID. 822 boost::multi_index::hashed_unique< 823 boost::multi_index::member<ConnectingClient6, std::vector<uint8_t>, 824 &ConnectingClient6::duid_> 825 >, 826 // Second index allows for counting all clients which are 827 // considered unacked. 828 boost::multi_index::ordered_non_unique< 829 boost::multi_index::member<ConnectingClient6, bool, &ConnectingClient6::unacked_> 830 > 831 > 832 > ConnectingClients6; 833 834 /// @brief Holds information about the clients attempting to contact 835 /// the partner server while the servers are in communications 836 /// interrupted state. 837 ConnectingClients6 connecting_clients_; 838 }; 839 840 /// @brief Pointer to the @c CommunicationState6 object. 841 typedef boost::shared_ptr<CommunicationState6> CommunicationState6Ptr; 842 843 } // end of namespace isc::ha 844 } // end of namespace isc 845 846 #endif 847