1package structs 2 3import ( 4 "bytes" 5 "container/heap" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/base64" 12 "encoding/hex" 13 "errors" 14 "fmt" 15 "hash" 16 "hash/crc32" 17 "math" 18 "net" 19 "os" 20 "path/filepath" 21 "reflect" 22 "regexp" 23 "sort" 24 "strconv" 25 "strings" 26 "time" 27 28 "github.com/hashicorp/nomad/lib/cpuset" 29 30 "github.com/hashicorp/cronexpr" 31 "github.com/hashicorp/go-msgpack/codec" 32 "github.com/hashicorp/go-multierror" 33 "github.com/hashicorp/go-version" 34 "github.com/mitchellh/copystructure" 35 "golang.org/x/crypto/blake2b" 36 37 "github.com/hashicorp/nomad/acl" 38 "github.com/hashicorp/nomad/command/agent/host" 39 "github.com/hashicorp/nomad/command/agent/pprof" 40 "github.com/hashicorp/nomad/helper" 41 "github.com/hashicorp/nomad/helper/args" 42 "github.com/hashicorp/nomad/helper/constraints/semver" 43 "github.com/hashicorp/nomad/helper/uuid" 44 "github.com/hashicorp/nomad/lib/kheap" 45 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 46) 47 48var ( 49 // validPolicyName is used to validate a policy name 50 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 51 52 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 53 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 54) 55 56type MessageType uint8 57 58// note: new raft message types need to be added to the end of this 59// list of contents 60const ( 61 NodeRegisterRequestType MessageType = 0 62 NodeDeregisterRequestType MessageType = 1 63 NodeUpdateStatusRequestType MessageType = 2 64 NodeUpdateDrainRequestType MessageType = 3 65 JobRegisterRequestType MessageType = 4 66 JobDeregisterRequestType MessageType = 5 67 EvalUpdateRequestType MessageType = 6 68 EvalDeleteRequestType MessageType = 7 69 AllocUpdateRequestType MessageType = 8 70 AllocClientUpdateRequestType MessageType = 9 71 ReconcileJobSummariesRequestType MessageType = 10 72 VaultAccessorRegisterRequestType MessageType = 11 73 VaultAccessorDeregisterRequestType MessageType = 12 74 ApplyPlanResultsRequestType MessageType = 13 75 DeploymentStatusUpdateRequestType MessageType = 14 76 DeploymentPromoteRequestType MessageType = 15 77 DeploymentAllocHealthRequestType MessageType = 16 78 DeploymentDeleteRequestType MessageType = 17 79 JobStabilityRequestType MessageType = 18 80 ACLPolicyUpsertRequestType MessageType = 19 81 ACLPolicyDeleteRequestType MessageType = 20 82 ACLTokenUpsertRequestType MessageType = 21 83 ACLTokenDeleteRequestType MessageType = 22 84 ACLTokenBootstrapRequestType MessageType = 23 85 AutopilotRequestType MessageType = 24 86 UpsertNodeEventsType MessageType = 25 87 JobBatchDeregisterRequestType MessageType = 26 88 AllocUpdateDesiredTransitionRequestType MessageType = 27 89 NodeUpdateEligibilityRequestType MessageType = 28 90 BatchNodeUpdateDrainRequestType MessageType = 29 91 SchedulerConfigRequestType MessageType = 30 92 NodeBatchDeregisterRequestType MessageType = 31 93 ClusterMetadataRequestType MessageType = 32 94 ServiceIdentityAccessorRegisterRequestType MessageType = 33 95 ServiceIdentityAccessorDeregisterRequestType MessageType = 34 96 CSIVolumeRegisterRequestType MessageType = 35 97 CSIVolumeDeregisterRequestType MessageType = 36 98 CSIVolumeClaimRequestType MessageType = 37 99 ScalingEventRegisterRequestType MessageType = 38 100 CSIVolumeClaimBatchRequestType MessageType = 39 101 CSIPluginDeleteRequestType MessageType = 40 102 EventSinkUpsertRequestType MessageType = 41 103 EventSinkDeleteRequestType MessageType = 42 104 BatchEventSinkUpdateProgressType MessageType = 43 105 OneTimeTokenUpsertRequestType MessageType = 44 106 OneTimeTokenDeleteRequestType MessageType = 45 107 OneTimeTokenExpireRequestType MessageType = 46 108 109 // Namespace types were moved from enterprise and therefore start at 64 110 NamespaceUpsertRequestType MessageType = 64 111 NamespaceDeleteRequestType MessageType = 65 112) 113 114const ( 115 // IgnoreUnknownTypeFlag is set along with a MessageType 116 // to indicate that the message type can be safely ignored 117 // if it is not recognized. This is for future proofing, so 118 // that new commands can be added in a way that won't cause 119 // old servers to crash when the FSM attempts to process them. 120 IgnoreUnknownTypeFlag MessageType = 128 121 122 // MsgTypeTestSetup is used during testing when calling state store 123 // methods directly that require an FSM MessageType 124 MsgTypeTestSetup MessageType = IgnoreUnknownTypeFlag 125 126 // ApiMajorVersion is returned as part of the Status.Version request. 127 // It should be incremented anytime the APIs are changed in a way 128 // that would break clients for sane client versioning. 129 ApiMajorVersion = 1 130 131 // ApiMinorVersion is returned as part of the Status.Version request. 132 // It should be incremented anytime the APIs are changed to allow 133 // for sane client versioning. Minor changes should be compatible 134 // within the major version. 135 ApiMinorVersion = 1 136 137 ProtocolVersion = "protocol" 138 APIMajorVersion = "api.major" 139 APIMinorVersion = "api.minor" 140 141 GetterModeAny = "any" 142 GetterModeFile = "file" 143 GetterModeDir = "dir" 144 145 // maxPolicyDescriptionLength limits a policy description length 146 maxPolicyDescriptionLength = 256 147 148 // maxTokenNameLength limits a ACL token name length 149 maxTokenNameLength = 256 150 151 // ACLClientToken and ACLManagementToken are the only types of tokens 152 ACLClientToken = "client" 153 ACLManagementToken = "management" 154 155 // DefaultNamespace is the default namespace. 156 DefaultNamespace = "default" 157 DefaultNamespaceDescription = "Default shared namespace" 158 159 // AllNamespacesSentinel is the value used as a namespace RPC value 160 // to indicate that endpoints must search in all namespaces 161 AllNamespacesSentinel = "*" 162 163 // maxNamespaceDescriptionLength limits a namespace description length 164 maxNamespaceDescriptionLength = 256 165 166 // JitterFraction is a the limit to the amount of jitter we apply 167 // to a user specified MaxQueryTime. We divide the specified time by 168 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 169 // applied to RPCHoldTimeout. 170 JitterFraction = 16 171 172 // MaxRetainedNodeEvents is the maximum number of node events that will be 173 // retained for a single node 174 MaxRetainedNodeEvents = 10 175 176 // MaxRetainedNodeScores is the number of top scoring nodes for which we 177 // retain scoring metadata 178 MaxRetainedNodeScores = 5 179 180 // Normalized scorer name 181 NormScorerName = "normalized-score" 182 183 // MaxBlockingRPCQueryTime is used to bound the limit of a blocking query 184 MaxBlockingRPCQueryTime = 300 * time.Second 185 186 // DefaultBlockingRPCQueryTime is the amount of time we block waiting for a change 187 // if no time is specified. Previously we would wait the MaxBlockingRPCQueryTime. 188 DefaultBlockingRPCQueryTime = 300 * time.Second 189) 190 191var ( 192 // validNamespaceName is used to validate a namespace name 193 validNamespaceName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 194) 195 196// NamespacedID is a tuple of an ID and a namespace 197type NamespacedID struct { 198 ID string 199 Namespace string 200} 201 202// NewNamespacedID returns a new namespaced ID given the ID and namespace 203func NewNamespacedID(id, ns string) NamespacedID { 204 return NamespacedID{ 205 ID: id, 206 Namespace: ns, 207 } 208} 209 210func (n NamespacedID) String() string { 211 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 212} 213 214// RPCInfo is used to describe common information about query 215type RPCInfo interface { 216 RequestRegion() string 217 IsRead() bool 218 AllowStaleRead() bool 219 IsForwarded() bool 220 SetForwarded() 221 TimeToBlock() time.Duration 222 // TimeToBlock sets how long this request can block. The requested time may not be possible, 223 // so Callers should readback TimeToBlock. E.g. you cannot set time to block at all on WriteRequests 224 // and it cannot exceed MaxBlockingRPCQueryTime 225 SetTimeToBlock(t time.Duration) 226} 227 228// InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 229// should NOT be replicated in the API package as it is internal only. 230type InternalRpcInfo struct { 231 // Forwarded marks whether the RPC has been forwarded. 232 Forwarded bool 233} 234 235// IsForwarded returns whether the RPC is forwarded from another server. 236func (i *InternalRpcInfo) IsForwarded() bool { 237 return i.Forwarded 238} 239 240// SetForwarded marks that the RPC is being forwarded from another server. 241func (i *InternalRpcInfo) SetForwarded() { 242 i.Forwarded = true 243} 244 245// QueryOptions is used to specify various flags for read queries 246type QueryOptions struct { 247 // The target region for this query 248 Region string 249 250 // Namespace is the target namespace for the query. 251 // 252 // Since handlers do not have a default value set they should access 253 // the Namespace via the RequestNamespace method. 254 // 255 // Requests accessing specific namespaced objects must check ACLs 256 // against the namespace of the object, not the namespace in the 257 // request. 258 Namespace string 259 260 // If set, wait until query exceeds given index. Must be provided 261 // with MaxQueryTime. 262 MinQueryIndex uint64 263 264 // Provided with MinQueryIndex to wait for change. 265 MaxQueryTime time.Duration 266 267 // If set, any follower can service the request. Results 268 // may be arbitrarily stale. 269 AllowStale bool 270 271 // If set, used as prefix for resource list searches 272 Prefix string 273 274 // AuthToken is secret portion of the ACL token used for the request 275 AuthToken string 276 277 // PerPage is the number of entries to be returned in queries that support 278 // paginated lists. 279 PerPage int32 280 281 // NextToken is the token used indicate where to start paging for queries 282 // that support paginated lists. 283 NextToken string 284 285 InternalRpcInfo 286} 287 288// TimeToBlock returns MaxQueryTime adjusted for maximums and defaults 289// it will return 0 if this is not a blocking query 290func (q QueryOptions) TimeToBlock() time.Duration { 291 if q.MinQueryIndex == 0 { 292 return 0 293 } 294 if q.MaxQueryTime > MaxBlockingRPCQueryTime { 295 return MaxBlockingRPCQueryTime 296 } else if q.MaxQueryTime <= 0 { 297 return DefaultBlockingRPCQueryTime 298 } 299 return q.MaxQueryTime 300} 301 302func (q QueryOptions) SetTimeToBlock(t time.Duration) { 303 q.MaxQueryTime = t 304} 305 306func (q QueryOptions) RequestRegion() string { 307 return q.Region 308} 309 310// RequestNamespace returns the request's namespace or the default namespace if 311// no explicit namespace was sent. 312// 313// Requests accessing specific namespaced objects must check ACLs against the 314// namespace of the object, not the namespace in the request. 315func (q QueryOptions) RequestNamespace() string { 316 if q.Namespace == "" { 317 return DefaultNamespace 318 } 319 return q.Namespace 320} 321 322// QueryOption only applies to reads, so always true 323func (q QueryOptions) IsRead() bool { 324 return true 325} 326 327func (q QueryOptions) AllowStaleRead() bool { 328 return q.AllowStale 329} 330 331// AgentPprofRequest is used to request a pprof report for a given node. 332type AgentPprofRequest struct { 333 // ReqType specifies the profile to use 334 ReqType pprof.ReqType 335 336 // Profile specifies the runtime/pprof profile to lookup and generate. 337 Profile string 338 339 // Seconds is the number of seconds to capture a profile 340 Seconds int 341 342 // Debug specifies if pprof profile should inclue debug output 343 Debug int 344 345 // GC specifies if the profile should call runtime.GC() before 346 // running its profile. This is only used for "heap" profiles 347 GC int 348 349 // NodeID is the node we want to track the logs of 350 NodeID string 351 352 // ServerID is the server we want to track the logs of 353 ServerID string 354 355 QueryOptions 356} 357 358// AgentPprofResponse is used to return a generated pprof profile 359type AgentPprofResponse struct { 360 // ID of the agent that fulfilled the request 361 AgentID string 362 363 // Payload is the generated pprof profile 364 Payload []byte 365 366 // HTTPHeaders are a set of key value pairs to be applied as 367 // HTTP headers for a specific runtime profile 368 HTTPHeaders map[string]string 369} 370 371type WriteRequest struct { 372 // The target region for this write 373 Region string 374 375 // Namespace is the target namespace for the write. 376 // 377 // Since RPC handlers do not have a default value set they should 378 // access the Namespace via the RequestNamespace method. 379 // 380 // Requests accessing specific namespaced objects must check ACLs 381 // against the namespace of the object, not the namespace in the 382 // request. 383 Namespace string 384 385 // AuthToken is secret portion of the ACL token used for the request 386 AuthToken string 387 388 InternalRpcInfo 389} 390 391func (w WriteRequest) TimeToBlock() time.Duration { 392 return 0 393} 394 395func (w WriteRequest) SetTimeToBlock(_ time.Duration) { 396} 397 398func (w WriteRequest) RequestRegion() string { 399 // The target region for this request 400 return w.Region 401} 402 403// RequestNamespace returns the request's namespace or the default namespace if 404// no explicit namespace was sent. 405// 406// Requests accessing specific namespaced objects must check ACLs against the 407// namespace of the object, not the namespace in the request. 408func (w WriteRequest) RequestNamespace() string { 409 if w.Namespace == "" { 410 return DefaultNamespace 411 } 412 return w.Namespace 413} 414 415// WriteRequest only applies to writes, always false 416func (w WriteRequest) IsRead() bool { 417 return false 418} 419 420func (w WriteRequest) AllowStaleRead() bool { 421 return false 422} 423 424// QueryMeta allows a query response to include potentially 425// useful metadata about a query 426type QueryMeta struct { 427 // This is the index associated with the read 428 Index uint64 429 430 // If AllowStale is used, this is time elapsed since 431 // last contact between the follower and leader. This 432 // can be used to gauge staleness. 433 LastContact time.Duration 434 435 // Used to indicate if there is a known leader node 436 KnownLeader bool 437} 438 439// WriteMeta allows a write response to include potentially 440// useful metadata about the write 441type WriteMeta struct { 442 // This is the index associated with the write 443 Index uint64 444} 445 446// NodeRegisterRequest is used for Node.Register endpoint 447// to register a node as being a schedulable entity. 448type NodeRegisterRequest struct { 449 Node *Node 450 NodeEvent *NodeEvent 451 WriteRequest 452} 453 454// NodeDeregisterRequest is used for Node.Deregister endpoint 455// to deregister a node as being a schedulable entity. 456type NodeDeregisterRequest struct { 457 NodeID string 458 WriteRequest 459} 460 461// NodeBatchDeregisterRequest is used for Node.BatchDeregister endpoint 462// to deregister a batch of nodes from being schedulable entities. 463type NodeBatchDeregisterRequest struct { 464 NodeIDs []string 465 WriteRequest 466} 467 468// NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 469// information used in RPC server lists. 470type NodeServerInfo struct { 471 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 472 // be contacted at for RPCs. 473 RPCAdvertiseAddr string 474 475 // RpcMajorVersion is the major version number the Nomad Server 476 // supports 477 RPCMajorVersion int32 478 479 // RpcMinorVersion is the minor version number the Nomad Server 480 // supports 481 RPCMinorVersion int32 482 483 // Datacenter is the datacenter that a Nomad server belongs to 484 Datacenter string 485} 486 487// NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 488// to update the status of a node. 489type NodeUpdateStatusRequest struct { 490 NodeID string 491 Status string 492 NodeEvent *NodeEvent 493 UpdatedAt int64 494 WriteRequest 495} 496 497// NodeUpdateDrainRequest is used for updating the drain strategy 498type NodeUpdateDrainRequest struct { 499 NodeID string 500 DrainStrategy *DrainStrategy 501 502 // MarkEligible marks the node as eligible if removing the drain strategy. 503 MarkEligible bool 504 505 // NodeEvent is the event added to the node 506 NodeEvent *NodeEvent 507 508 // UpdatedAt represents server time of receiving request 509 UpdatedAt int64 510 511 // Meta is user-provided metadata relating to the drain operation 512 Meta map[string]string 513 514 WriteRequest 515} 516 517// BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 518// batch of nodes 519type BatchNodeUpdateDrainRequest struct { 520 // Updates is a mapping of nodes to their updated drain strategy 521 Updates map[string]*DrainUpdate 522 523 // NodeEvents is a mapping of the node to the event to add to the node 524 NodeEvents map[string]*NodeEvent 525 526 // UpdatedAt represents server time of receiving request 527 UpdatedAt int64 528 529 WriteRequest 530} 531 532// DrainUpdate is used to update the drain of a node 533type DrainUpdate struct { 534 // DrainStrategy is the new strategy for the node 535 DrainStrategy *DrainStrategy 536 537 // MarkEligible marks the node as eligible if removing the drain strategy. 538 MarkEligible bool 539} 540 541// NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 542type NodeUpdateEligibilityRequest struct { 543 NodeID string 544 Eligibility string 545 546 // NodeEvent is the event added to the node 547 NodeEvent *NodeEvent 548 549 // UpdatedAt represents server time of receiving request 550 UpdatedAt int64 551 552 WriteRequest 553} 554 555// NodeEvaluateRequest is used to re-evaluate the node 556type NodeEvaluateRequest struct { 557 NodeID string 558 WriteRequest 559} 560 561// NodeSpecificRequest is used when we just need to specify a target node 562type NodeSpecificRequest struct { 563 NodeID string 564 SecretID string 565 QueryOptions 566} 567 568// JobRegisterRequest is used for Job.Register endpoint 569// to register a job as being a schedulable entity. 570type JobRegisterRequest struct { 571 Job *Job 572 573 // If EnforceIndex is set then the job will only be registered if the passed 574 // JobModifyIndex matches the current Jobs index. If the index is zero, the 575 // register only occurs if the job is new. 576 EnforceIndex bool 577 JobModifyIndex uint64 578 579 // PreserveCounts indicates that during job update, existing task group 580 // counts should be preserved, over those specified in the new job spec 581 // PreserveCounts is ignored for newly created jobs. 582 PreserveCounts bool 583 584 // PolicyOverride is set when the user is attempting to override any policies 585 PolicyOverride bool 586 587 // Eval is the evaluation that is associated with the job registration 588 Eval *Evaluation 589 590 WriteRequest 591} 592 593// JobDeregisterRequest is used for Job.Deregister endpoint 594// to deregister a job as being a schedulable entity. 595type JobDeregisterRequest struct { 596 JobID string 597 598 // Purge controls whether the deregister purges the job from the system or 599 // whether the job is just marked as stopped and will be removed by the 600 // garbage collector 601 Purge bool 602 603 // Global controls whether all regions of a multi-region job are 604 // deregistered. It is ignored for single-region jobs. 605 Global bool 606 607 // Eval is the evaluation to create that's associated with job deregister 608 Eval *Evaluation 609 610 WriteRequest 611} 612 613// JobBatchDeregisterRequest is used to batch deregister jobs and upsert 614// evaluations. 615type JobBatchDeregisterRequest struct { 616 // Jobs is the set of jobs to deregister 617 Jobs map[NamespacedID]*JobDeregisterOptions 618 619 // Evals is the set of evaluations to create. 620 Evals []*Evaluation 621 622 WriteRequest 623} 624 625// JobDeregisterOptions configures how a job is deregistered. 626type JobDeregisterOptions struct { 627 // Purge controls whether the deregister purges the job from the system or 628 // whether the job is just marked as stopped and will be removed by the 629 // garbage collector 630 Purge bool 631} 632 633// JobEvaluateRequest is used when we just need to re-evaluate a target job 634type JobEvaluateRequest struct { 635 JobID string 636 EvalOptions EvalOptions 637 WriteRequest 638} 639 640// EvalOptions is used to encapsulate options when forcing a job evaluation 641type EvalOptions struct { 642 ForceReschedule bool 643} 644 645// JobSpecificRequest is used when we just need to specify a target job 646type JobSpecificRequest struct { 647 JobID string 648 All bool 649 QueryOptions 650} 651 652// JobListRequest is used to parameterize a list request 653type JobListRequest struct { 654 QueryOptions 655} 656 657// JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 658// evaluation of the Job. 659type JobPlanRequest struct { 660 Job *Job 661 Diff bool // Toggles an annotated diff 662 // PolicyOverride is set when the user is attempting to override any policies 663 PolicyOverride bool 664 WriteRequest 665} 666 667// JobScaleRequest is used for the Job.Scale endpoint to scale one of the 668// scaling targets in a job 669type JobScaleRequest struct { 670 JobID string 671 Target map[string]string 672 Count *int64 673 Message string 674 Error bool 675 Meta map[string]interface{} 676 // PolicyOverride is set when the user is attempting to override any policies 677 PolicyOverride bool 678 WriteRequest 679} 680 681// Validate is used to validate the arguments in the request 682func (r *JobScaleRequest) Validate() error { 683 namespace := r.Target[ScalingTargetNamespace] 684 if namespace != "" && namespace != r.RequestNamespace() { 685 return NewErrRPCCoded(400, "namespace in payload did not match header") 686 } 687 688 jobID := r.Target[ScalingTargetJob] 689 if jobID != "" && jobID != r.JobID { 690 return fmt.Errorf("job ID in payload did not match URL") 691 } 692 693 groupName := r.Target[ScalingTargetGroup] 694 if groupName == "" { 695 return NewErrRPCCoded(400, "missing task group name for scaling action") 696 } 697 698 if r.Count != nil { 699 if *r.Count < 0 { 700 return NewErrRPCCoded(400, "scaling action count can't be negative") 701 } 702 703 if r.Error { 704 return NewErrRPCCoded(400, "scaling action should not contain count if error is true") 705 } 706 707 truncCount := int(*r.Count) 708 if int64(truncCount) != *r.Count { 709 return NewErrRPCCoded(400, 710 fmt.Sprintf("new scaling count is too large for TaskGroup.Count (int): %v", r.Count)) 711 } 712 } 713 714 return nil 715} 716 717// JobSummaryRequest is used when we just need to get a specific job summary 718type JobSummaryRequest struct { 719 JobID string 720 QueryOptions 721} 722 723// JobScaleStatusRequest is used to get the scale status for a job 724type JobScaleStatusRequest struct { 725 JobID string 726 QueryOptions 727} 728 729// JobDispatchRequest is used to dispatch a job based on a parameterized job 730type JobDispatchRequest struct { 731 JobID string 732 Payload []byte 733 Meta map[string]string 734 WriteRequest 735} 736 737// JobValidateRequest is used to validate a job 738type JobValidateRequest struct { 739 Job *Job 740 WriteRequest 741} 742 743// JobRevertRequest is used to revert a job to a prior version. 744type JobRevertRequest struct { 745 // JobID is the ID of the job being reverted 746 JobID string 747 748 // JobVersion the version to revert to. 749 JobVersion uint64 750 751 // EnforcePriorVersion if set will enforce that the job is at the given 752 // version before reverting. 753 EnforcePriorVersion *uint64 754 755 // ConsulToken is the Consul token that proves the submitter of the job revert 756 // has access to the Service Identity policies associated with the job's 757 // Consul Connect enabled services. This field is only used to transfer the 758 // token and is not stored after the Job revert. 759 ConsulToken string 760 761 // VaultToken is the Vault token that proves the submitter of the job revert 762 // has access to any Vault policies specified in the targeted job version. This 763 // field is only used to transfer the token and is not stored after the Job 764 // revert. 765 VaultToken string 766 767 WriteRequest 768} 769 770// JobStabilityRequest is used to marked a job as stable. 771type JobStabilityRequest struct { 772 // Job to set the stability on 773 JobID string 774 JobVersion uint64 775 776 // Set the stability 777 Stable bool 778 WriteRequest 779} 780 781// JobStabilityResponse is the response when marking a job as stable. 782type JobStabilityResponse struct { 783 WriteMeta 784} 785 786// NodeListRequest is used to parameterize a list request 787type NodeListRequest struct { 788 QueryOptions 789 790 Fields *NodeStubFields 791} 792 793// EvalUpdateRequest is used for upserting evaluations. 794type EvalUpdateRequest struct { 795 Evals []*Evaluation 796 EvalToken string 797 WriteRequest 798} 799 800// EvalDeleteRequest is used for deleting an evaluation. 801type EvalDeleteRequest struct { 802 Evals []string 803 Allocs []string 804 WriteRequest 805} 806 807// EvalSpecificRequest is used when we just need to specify a target evaluation 808type EvalSpecificRequest struct { 809 EvalID string 810 QueryOptions 811} 812 813// EvalAckRequest is used to Ack/Nack a specific evaluation 814type EvalAckRequest struct { 815 EvalID string 816 Token string 817 WriteRequest 818} 819 820// EvalDequeueRequest is used when we want to dequeue an evaluation 821type EvalDequeueRequest struct { 822 Schedulers []string 823 Timeout time.Duration 824 SchedulerVersion uint16 825 WriteRequest 826} 827 828// EvalListRequest is used to list the evaluations 829type EvalListRequest struct { 830 QueryOptions 831} 832 833// PlanRequest is used to submit an allocation plan to the leader 834type PlanRequest struct { 835 Plan *Plan 836 WriteRequest 837} 838 839// ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 840// committing the result of a plan. 841type ApplyPlanResultsRequest struct { 842 // AllocUpdateRequest holds the allocation updates to be made by the 843 // scheduler. 844 AllocUpdateRequest 845 846 // Deployment is the deployment created or updated as a result of a 847 // scheduling event. 848 Deployment *Deployment 849 850 // DeploymentUpdates is a set of status updates to apply to the given 851 // deployments. This allows the scheduler to cancel any unneeded deployment 852 // because the job is stopped or the update block is removed. 853 DeploymentUpdates []*DeploymentStatusUpdate 854 855 // EvalID is the eval ID of the plan being applied. The modify index of the 856 // evaluation is updated as part of applying the plan to ensure that subsequent 857 // scheduling events for the same job will wait for the index that last produced 858 // state changes. This is necessary for blocked evaluations since they can be 859 // processed many times, potentially making state updates, without the state of 860 // the evaluation itself being updated. 861 EvalID string 862 863 // COMPAT 0.11 864 // NodePreemptions is a slice of allocations from other lower priority jobs 865 // that are preempted. Preempted allocations are marked as evicted. 866 // Deprecated: Replaced with AllocsPreempted which contains only the diff 867 NodePreemptions []*Allocation 868 869 // AllocsPreempted is a slice of allocation diffs from other lower priority jobs 870 // that are preempted. Preempted allocations are marked as evicted. 871 AllocsPreempted []*AllocationDiff 872 873 // PreemptionEvals is a slice of follow up evals for jobs whose allocations 874 // have been preempted to place allocs in this plan 875 PreemptionEvals []*Evaluation 876} 877 878// AllocUpdateRequest is used to submit changes to allocations, either 879// to cause evictions or to assign new allocations. Both can be done 880// within a single transaction 881type AllocUpdateRequest struct { 882 // COMPAT 0.11 883 // Alloc is the list of new allocations to assign 884 // Deprecated: Replaced with two separate slices, one containing stopped allocations 885 // and another containing updated allocations 886 Alloc []*Allocation 887 888 // Allocations to stop. Contains only the diff, not the entire allocation 889 AllocsStopped []*AllocationDiff 890 891 // New or updated allocations 892 AllocsUpdated []*Allocation 893 894 // Evals is the list of new evaluations to create 895 // Evals are valid only when used in the Raft RPC 896 Evals []*Evaluation 897 898 // Job is the shared parent job of the allocations. 899 // It is pulled out since it is common to reduce payload size. 900 Job *Job 901 902 WriteRequest 903} 904 905// AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 906// desired transition state. 907type AllocUpdateDesiredTransitionRequest struct { 908 // Allocs is the mapping of allocation ids to their desired state 909 // transition 910 Allocs map[string]*DesiredTransition 911 912 // Evals is the set of evaluations to create 913 Evals []*Evaluation 914 915 WriteRequest 916} 917 918// AllocStopRequest is used to stop and reschedule a running Allocation. 919type AllocStopRequest struct { 920 AllocID string 921 922 WriteRequest 923} 924 925// AllocStopResponse is the response to an `AllocStopRequest` 926type AllocStopResponse struct { 927 // EvalID is the id of the follow up evalution for the rescheduled alloc. 928 EvalID string 929 930 WriteMeta 931} 932 933// AllocListRequest is used to request a list of allocations 934type AllocListRequest struct { 935 QueryOptions 936 937 Fields *AllocStubFields 938} 939 940// AllocSpecificRequest is used to query a specific allocation 941type AllocSpecificRequest struct { 942 AllocID string 943 QueryOptions 944} 945 946// AllocSignalRequest is used to signal a specific allocation 947type AllocSignalRequest struct { 948 AllocID string 949 Task string 950 Signal string 951 QueryOptions 952} 953 954// AllocsGetRequest is used to query a set of allocations 955type AllocsGetRequest struct { 956 AllocIDs []string 957 QueryOptions 958} 959 960// AllocRestartRequest is used to restart a specific allocations tasks. 961type AllocRestartRequest struct { 962 AllocID string 963 TaskName string 964 965 QueryOptions 966} 967 968// PeriodicForceRequest is used to force a specific periodic job. 969type PeriodicForceRequest struct { 970 JobID string 971 WriteRequest 972} 973 974// ServerMembersResponse has the list of servers in a cluster 975type ServerMembersResponse struct { 976 ServerName string 977 ServerRegion string 978 ServerDC string 979 Members []*ServerMember 980} 981 982// ServerMember holds information about a Nomad server agent in a cluster 983type ServerMember struct { 984 Name string 985 Addr net.IP 986 Port uint16 987 Tags map[string]string 988 Status string 989 ProtocolMin uint8 990 ProtocolMax uint8 991 ProtocolCur uint8 992 DelegateMin uint8 993 DelegateMax uint8 994 DelegateCur uint8 995} 996 997// ClusterMetadata is used to store per-cluster metadata. 998type ClusterMetadata struct { 999 ClusterID string 1000 CreateTime int64 1001} 1002 1003// DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 1004// following tasks in the given allocation 1005type DeriveVaultTokenRequest struct { 1006 NodeID string 1007 SecretID string 1008 AllocID string 1009 Tasks []string 1010 QueryOptions 1011} 1012 1013// VaultAccessorsRequest is used to operate on a set of Vault accessors 1014type VaultAccessorsRequest struct { 1015 Accessors []*VaultAccessor 1016} 1017 1018// VaultAccessor is a reference to a created Vault token on behalf of 1019// an allocation's task. 1020type VaultAccessor struct { 1021 AllocID string 1022 Task string 1023 NodeID string 1024 Accessor string 1025 CreationTTL int 1026 1027 // Raft Indexes 1028 CreateIndex uint64 1029} 1030 1031// DeriveVaultTokenResponse returns the wrapped tokens for each requested task 1032type DeriveVaultTokenResponse struct { 1033 // Tasks is a mapping between the task name and the wrapped token 1034 Tasks map[string]string 1035 1036 // Error stores any error that occurred. Errors are stored here so we can 1037 // communicate whether it is retryable 1038 Error *RecoverableError 1039 1040 QueryMeta 1041} 1042 1043// GenericRequest is used to request where no 1044// specific information is needed. 1045type GenericRequest struct { 1046 QueryOptions 1047} 1048 1049// DeploymentListRequest is used to list the deployments 1050type DeploymentListRequest struct { 1051 QueryOptions 1052} 1053 1054// DeploymentDeleteRequest is used for deleting deployments. 1055type DeploymentDeleteRequest struct { 1056 Deployments []string 1057 WriteRequest 1058} 1059 1060// DeploymentStatusUpdateRequest is used to update the status of a deployment as 1061// well as optionally creating an evaluation atomically. 1062type DeploymentStatusUpdateRequest struct { 1063 // Eval, if set, is used to create an evaluation at the same time as 1064 // updating the status of a deployment. 1065 Eval *Evaluation 1066 1067 // DeploymentUpdate is a status update to apply to the given 1068 // deployment. 1069 DeploymentUpdate *DeploymentStatusUpdate 1070 1071 // Job is used to optionally upsert a job. This is used when setting the 1072 // allocation health results in a deployment failure and the deployment 1073 // auto-reverts to the latest stable job. 1074 Job *Job 1075} 1076 1077// DeploymentAllocHealthRequest is used to set the health of a set of 1078// allocations as part of a deployment. 1079type DeploymentAllocHealthRequest struct { 1080 DeploymentID string 1081 1082 // Marks these allocations as healthy, allow further allocations 1083 // to be rolled. 1084 HealthyAllocationIDs []string 1085 1086 // Any unhealthy allocations fail the deployment 1087 UnhealthyAllocationIDs []string 1088 1089 WriteRequest 1090} 1091 1092// ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 1093type ApplyDeploymentAllocHealthRequest struct { 1094 DeploymentAllocHealthRequest 1095 1096 // Timestamp is the timestamp to use when setting the allocations health. 1097 Timestamp time.Time 1098 1099 // An optional field to update the status of a deployment 1100 DeploymentUpdate *DeploymentStatusUpdate 1101 1102 // Job is used to optionally upsert a job. This is used when setting the 1103 // allocation health results in a deployment failure and the deployment 1104 // auto-reverts to the latest stable job. 1105 Job *Job 1106 1107 // An optional evaluation to create after promoting the canaries 1108 Eval *Evaluation 1109} 1110 1111// DeploymentPromoteRequest is used to promote task groups in a deployment 1112type DeploymentPromoteRequest struct { 1113 DeploymentID string 1114 1115 // All is to promote all task groups 1116 All bool 1117 1118 // Groups is used to set the promotion status per task group 1119 Groups []string 1120 1121 WriteRequest 1122} 1123 1124// ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 1125type ApplyDeploymentPromoteRequest struct { 1126 DeploymentPromoteRequest 1127 1128 // An optional evaluation to create after promoting the canaries 1129 Eval *Evaluation 1130} 1131 1132// DeploymentPauseRequest is used to pause a deployment 1133type DeploymentPauseRequest struct { 1134 DeploymentID string 1135 1136 // Pause sets the pause status 1137 Pause bool 1138 1139 WriteRequest 1140} 1141 1142// DeploymentRunRequest is used to remotely start a pending deployment. 1143// Used only for multiregion deployments. 1144type DeploymentRunRequest struct { 1145 DeploymentID string 1146 1147 WriteRequest 1148} 1149 1150// DeploymentUnblockRequest is used to remotely unblock a deployment. 1151// Used only for multiregion deployments. 1152type DeploymentUnblockRequest struct { 1153 DeploymentID string 1154 1155 WriteRequest 1156} 1157 1158// DeploymentCancelRequest is used to remotely cancel a deployment. 1159// Used only for multiregion deployments. 1160type DeploymentCancelRequest struct { 1161 DeploymentID string 1162 1163 WriteRequest 1164} 1165 1166// DeploymentSpecificRequest is used to make a request specific to a particular 1167// deployment 1168type DeploymentSpecificRequest struct { 1169 DeploymentID string 1170 QueryOptions 1171} 1172 1173// DeploymentFailRequest is used to fail a particular deployment 1174type DeploymentFailRequest struct { 1175 DeploymentID string 1176 WriteRequest 1177} 1178 1179// ScalingPolicySpecificRequest is used when we just need to specify a target scaling policy 1180type ScalingPolicySpecificRequest struct { 1181 ID string 1182 QueryOptions 1183} 1184 1185// SingleScalingPolicyResponse is used to return a single job 1186type SingleScalingPolicyResponse struct { 1187 Policy *ScalingPolicy 1188 QueryMeta 1189} 1190 1191// ScalingPolicyListRequest is used to parameterize a scaling policy list request 1192type ScalingPolicyListRequest struct { 1193 Job string 1194 Type string 1195 QueryOptions 1196} 1197 1198// ScalingPolicyListResponse is used for a list request 1199type ScalingPolicyListResponse struct { 1200 Policies []*ScalingPolicyListStub 1201 QueryMeta 1202} 1203 1204// SingleDeploymentResponse is used to respond with a single deployment 1205type SingleDeploymentResponse struct { 1206 Deployment *Deployment 1207 QueryMeta 1208} 1209 1210// GenericResponse is used to respond to a request where no 1211// specific response information is needed. 1212type GenericResponse struct { 1213 WriteMeta 1214} 1215 1216// VersionResponse is used for the Status.Version response 1217type VersionResponse struct { 1218 Build string 1219 Versions map[string]int 1220 QueryMeta 1221} 1222 1223// JobRegisterResponse is used to respond to a job registration 1224type JobRegisterResponse struct { 1225 EvalID string 1226 EvalCreateIndex uint64 1227 JobModifyIndex uint64 1228 1229 // Warnings contains any warnings about the given job. These may include 1230 // deprecation warnings. 1231 Warnings string 1232 1233 QueryMeta 1234} 1235 1236// JobDeregisterResponse is used to respond to a job deregistration 1237type JobDeregisterResponse struct { 1238 EvalID string 1239 EvalCreateIndex uint64 1240 JobModifyIndex uint64 1241 VolumeEvalID string 1242 VolumeEvalIndex uint64 1243 QueryMeta 1244} 1245 1246// JobBatchDeregisterResponse is used to respond to a batch job deregistration 1247type JobBatchDeregisterResponse struct { 1248 // JobEvals maps the job to its created evaluation 1249 JobEvals map[NamespacedID]string 1250 QueryMeta 1251} 1252 1253// JobValidateResponse is the response from validate request 1254type JobValidateResponse struct { 1255 // DriverConfigValidated indicates whether the agent validated the driver 1256 // config 1257 DriverConfigValidated bool 1258 1259 // ValidationErrors is a list of validation errors 1260 ValidationErrors []string 1261 1262 // Error is a string version of any error that may have occurred 1263 Error string 1264 1265 // Warnings contains any warnings about the given job. These may include 1266 // deprecation warnings. 1267 Warnings string 1268} 1269 1270// NodeUpdateResponse is used to respond to a node update 1271type NodeUpdateResponse struct { 1272 HeartbeatTTL time.Duration 1273 EvalIDs []string 1274 EvalCreateIndex uint64 1275 NodeModifyIndex uint64 1276 1277 // Features informs clients what enterprise features are allowed 1278 Features uint64 1279 1280 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 1281 // empty, the current Nomad Server is in the minority of a partition. 1282 LeaderRPCAddr string 1283 1284 // NumNodes is the number of Nomad nodes attached to this quorum of 1285 // Nomad Servers at the time of the response. This value can 1286 // fluctuate based on the health of the cluster between heartbeats. 1287 NumNodes int32 1288 1289 // Servers is the full list of known Nomad servers in the local 1290 // region. 1291 Servers []*NodeServerInfo 1292 1293 QueryMeta 1294} 1295 1296// NodeDrainUpdateResponse is used to respond to a node drain update 1297type NodeDrainUpdateResponse struct { 1298 NodeModifyIndex uint64 1299 EvalIDs []string 1300 EvalCreateIndex uint64 1301 WriteMeta 1302} 1303 1304// NodeEligibilityUpdateResponse is used to respond to a node eligibility update 1305type NodeEligibilityUpdateResponse struct { 1306 NodeModifyIndex uint64 1307 EvalIDs []string 1308 EvalCreateIndex uint64 1309 WriteMeta 1310} 1311 1312// NodeAllocsResponse is used to return allocs for a single node 1313type NodeAllocsResponse struct { 1314 Allocs []*Allocation 1315 QueryMeta 1316} 1317 1318// NodeClientAllocsResponse is used to return allocs meta data for a single node 1319type NodeClientAllocsResponse struct { 1320 Allocs map[string]uint64 1321 1322 // MigrateTokens are used when ACLs are enabled to allow cross node, 1323 // authenticated access to sticky volumes 1324 MigrateTokens map[string]string 1325 1326 QueryMeta 1327} 1328 1329// SingleNodeResponse is used to return a single node 1330type SingleNodeResponse struct { 1331 Node *Node 1332 QueryMeta 1333} 1334 1335// NodeListResponse is used for a list request 1336type NodeListResponse struct { 1337 Nodes []*NodeListStub 1338 QueryMeta 1339} 1340 1341// SingleJobResponse is used to return a single job 1342type SingleJobResponse struct { 1343 Job *Job 1344 QueryMeta 1345} 1346 1347// JobSummaryResponse is used to return a single job summary 1348type JobSummaryResponse struct { 1349 JobSummary *JobSummary 1350 QueryMeta 1351} 1352 1353// JobScaleStatusResponse is used to return the scale status for a job 1354type JobScaleStatusResponse struct { 1355 JobScaleStatus *JobScaleStatus 1356 QueryMeta 1357} 1358 1359type JobScaleStatus struct { 1360 JobID string 1361 Namespace string 1362 JobCreateIndex uint64 1363 JobModifyIndex uint64 1364 JobStopped bool 1365 TaskGroups map[string]*TaskGroupScaleStatus 1366} 1367 1368// TaskGroupScaleStatus is used to return the scale status for a given task group 1369type TaskGroupScaleStatus struct { 1370 Desired int 1371 Placed int 1372 Running int 1373 Healthy int 1374 Unhealthy int 1375 Events []*ScalingEvent 1376} 1377 1378type JobDispatchResponse struct { 1379 DispatchedJobID string 1380 EvalID string 1381 EvalCreateIndex uint64 1382 JobCreateIndex uint64 1383 WriteMeta 1384} 1385 1386// JobListResponse is used for a list request 1387type JobListResponse struct { 1388 Jobs []*JobListStub 1389 QueryMeta 1390} 1391 1392// JobVersionsRequest is used to get a jobs versions 1393type JobVersionsRequest struct { 1394 JobID string 1395 Diffs bool 1396 QueryOptions 1397} 1398 1399// JobVersionsResponse is used for a job get versions request 1400type JobVersionsResponse struct { 1401 Versions []*Job 1402 Diffs []*JobDiff 1403 QueryMeta 1404} 1405 1406// JobPlanResponse is used to respond to a job plan request 1407type JobPlanResponse struct { 1408 // Annotations stores annotations explaining decisions the scheduler made. 1409 Annotations *PlanAnnotations 1410 1411 // FailedTGAllocs is the placement failures per task group. 1412 FailedTGAllocs map[string]*AllocMetric 1413 1414 // JobModifyIndex is the modification index of the job. The value can be 1415 // used when running `nomad run` to ensure that the Job wasn’t modified 1416 // since the last plan. If the job is being created, the value is zero. 1417 JobModifyIndex uint64 1418 1419 // CreatedEvals is the set of evaluations created by the scheduler. The 1420 // reasons for this can be rolling-updates or blocked evals. 1421 CreatedEvals []*Evaluation 1422 1423 // Diff contains the diff of the job and annotations on whether the change 1424 // causes an in-place update or create/destroy 1425 Diff *JobDiff 1426 1427 // NextPeriodicLaunch is the time duration till the job would be launched if 1428 // submitted. 1429 NextPeriodicLaunch time.Time 1430 1431 // Warnings contains any warnings about the given job. These may include 1432 // deprecation warnings. 1433 Warnings string 1434 1435 WriteMeta 1436} 1437 1438// SingleAllocResponse is used to return a single allocation 1439type SingleAllocResponse struct { 1440 Alloc *Allocation 1441 QueryMeta 1442} 1443 1444// AllocsGetResponse is used to return a set of allocations 1445type AllocsGetResponse struct { 1446 Allocs []*Allocation 1447 QueryMeta 1448} 1449 1450// JobAllocationsResponse is used to return the allocations for a job 1451type JobAllocationsResponse struct { 1452 Allocations []*AllocListStub 1453 QueryMeta 1454} 1455 1456// JobEvaluationsResponse is used to return the evaluations for a job 1457type JobEvaluationsResponse struct { 1458 Evaluations []*Evaluation 1459 QueryMeta 1460} 1461 1462// SingleEvalResponse is used to return a single evaluation 1463type SingleEvalResponse struct { 1464 Eval *Evaluation 1465 QueryMeta 1466} 1467 1468// EvalDequeueResponse is used to return from a dequeue 1469type EvalDequeueResponse struct { 1470 Eval *Evaluation 1471 Token string 1472 1473 // WaitIndex is the Raft index the worker should wait until invoking the 1474 // scheduler. 1475 WaitIndex uint64 1476 1477 QueryMeta 1478} 1479 1480// GetWaitIndex is used to retrieve the Raft index in which state should be at 1481// or beyond before invoking the scheduler. 1482func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1483 // Prefer the wait index sent. This will be populated on all responses from 1484 // 0.7.0 and above 1485 if e.WaitIndex != 0 { 1486 return e.WaitIndex 1487 } else if e.Eval != nil { 1488 return e.Eval.ModifyIndex 1489 } 1490 1491 // This should never happen 1492 return 1 1493} 1494 1495// PlanResponse is used to return from a PlanRequest 1496type PlanResponse struct { 1497 Result *PlanResult 1498 WriteMeta 1499} 1500 1501// AllocListResponse is used for a list request 1502type AllocListResponse struct { 1503 Allocations []*AllocListStub 1504 QueryMeta 1505} 1506 1507// DeploymentListResponse is used for a list request 1508type DeploymentListResponse struct { 1509 Deployments []*Deployment 1510 QueryMeta 1511} 1512 1513// EvalListResponse is used for a list request 1514type EvalListResponse struct { 1515 Evaluations []*Evaluation 1516 QueryMeta 1517} 1518 1519// EvalAllocationsResponse is used to return the allocations for an evaluation 1520type EvalAllocationsResponse struct { 1521 Allocations []*AllocListStub 1522 QueryMeta 1523} 1524 1525// PeriodicForceResponse is used to respond to a periodic job force launch 1526type PeriodicForceResponse struct { 1527 EvalID string 1528 EvalCreateIndex uint64 1529 WriteMeta 1530} 1531 1532// DeploymentUpdateResponse is used to respond to a deployment change. The 1533// response will include the modify index of the deployment as well as details 1534// of any triggered evaluation. 1535type DeploymentUpdateResponse struct { 1536 EvalID string 1537 EvalCreateIndex uint64 1538 DeploymentModifyIndex uint64 1539 1540 // RevertedJobVersion is the version the job was reverted to. If unset, the 1541 // job wasn't reverted 1542 RevertedJobVersion *uint64 1543 1544 WriteMeta 1545} 1546 1547// NodeConnQueryResponse is used to respond to a query of whether a server has 1548// a connection to a specific Node 1549type NodeConnQueryResponse struct { 1550 // Connected indicates whether a connection to the Client exists 1551 Connected bool 1552 1553 // Established marks the time at which the connection was established 1554 Established time.Time 1555 1556 QueryMeta 1557} 1558 1559// HostDataRequest is used by /agent/host to retrieve data about the agent's host system. If 1560// ServerID or NodeID is specified, the request is forwarded to the remote agent 1561type HostDataRequest struct { 1562 ServerID string 1563 NodeID string 1564 QueryOptions 1565} 1566 1567// HostDataResponse contains the HostData content 1568type HostDataResponse struct { 1569 AgentID string 1570 HostData *host.HostData 1571} 1572 1573// EmitNodeEventsRequest is a request to update the node events source 1574// with a new client-side event 1575type EmitNodeEventsRequest struct { 1576 // NodeEvents are a map where the key is a node id, and value is a list of 1577 // events for that node 1578 NodeEvents map[string][]*NodeEvent 1579 1580 WriteRequest 1581} 1582 1583// EmitNodeEventsResponse is a response to the client about the status of 1584// the node event source update. 1585type EmitNodeEventsResponse struct { 1586 WriteMeta 1587} 1588 1589const ( 1590 NodeEventSubsystemDrain = "Drain" 1591 NodeEventSubsystemDriver = "Driver" 1592 NodeEventSubsystemHeartbeat = "Heartbeat" 1593 NodeEventSubsystemCluster = "Cluster" 1594 NodeEventSubsystemStorage = "Storage" 1595) 1596 1597// NodeEvent is a single unit representing a node’s state change 1598type NodeEvent struct { 1599 Message string 1600 Subsystem string 1601 Details map[string]string 1602 Timestamp time.Time 1603 CreateIndex uint64 1604} 1605 1606func (ne *NodeEvent) String() string { 1607 var details []string 1608 for k, v := range ne.Details { 1609 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1610 } 1611 1612 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1613} 1614 1615func (ne *NodeEvent) Copy() *NodeEvent { 1616 c := new(NodeEvent) 1617 *c = *ne 1618 c.Details = helper.CopyMapStringString(ne.Details) 1619 return c 1620} 1621 1622// NewNodeEvent generates a new node event storing the current time as the 1623// timestamp 1624func NewNodeEvent() *NodeEvent { 1625 return &NodeEvent{Timestamp: time.Now()} 1626} 1627 1628// SetMessage is used to set the message on the node event 1629func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1630 ne.Message = msg 1631 return ne 1632} 1633 1634// SetSubsystem is used to set the subsystem on the node event 1635func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1636 ne.Subsystem = sys 1637 return ne 1638} 1639 1640// SetTimestamp is used to set the timestamp on the node event 1641func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1642 ne.Timestamp = ts 1643 return ne 1644} 1645 1646// AddDetail is used to add a detail to the node event 1647func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1648 if ne.Details == nil { 1649 ne.Details = make(map[string]string, 1) 1650 } 1651 ne.Details[k] = v 1652 return ne 1653} 1654 1655const ( 1656 NodeStatusInit = "initializing" 1657 NodeStatusReady = "ready" 1658 NodeStatusDown = "down" 1659) 1660 1661// ShouldDrainNode checks if a given node status should trigger an 1662// evaluation. Some states don't require any further action. 1663func ShouldDrainNode(status string) bool { 1664 switch status { 1665 case NodeStatusInit, NodeStatusReady: 1666 return false 1667 case NodeStatusDown: 1668 return true 1669 default: 1670 panic(fmt.Sprintf("unhandled node status %s", status)) 1671 } 1672} 1673 1674// ValidNodeStatus is used to check if a node status is valid 1675func ValidNodeStatus(status string) bool { 1676 switch status { 1677 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1678 return true 1679 default: 1680 return false 1681 } 1682} 1683 1684const ( 1685 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1686 // respectively, for receiving allocations. This is orthoginal to the node 1687 // status being ready. 1688 NodeSchedulingEligible = "eligible" 1689 NodeSchedulingIneligible = "ineligible" 1690) 1691 1692// DrainSpec describes a Node's desired drain behavior. 1693type DrainSpec struct { 1694 // Deadline is the duration after StartTime when the remaining 1695 // allocations on a draining Node should be told to stop. 1696 Deadline time.Duration 1697 1698 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1699 // has been marked for draining. 1700 IgnoreSystemJobs bool 1701} 1702 1703// DrainStrategy describes a Node's drain behavior. 1704type DrainStrategy struct { 1705 // DrainSpec is the user declared drain specification 1706 DrainSpec 1707 1708 // ForceDeadline is the deadline time for the drain after which drains will 1709 // be forced 1710 ForceDeadline time.Time 1711 1712 // StartedAt is the time the drain process started 1713 StartedAt time.Time 1714} 1715 1716func (d *DrainStrategy) Copy() *DrainStrategy { 1717 if d == nil { 1718 return nil 1719 } 1720 1721 nd := new(DrainStrategy) 1722 *nd = *d 1723 return nd 1724} 1725 1726// DeadlineTime returns a boolean whether the drain strategy allows an infinite 1727// duration or otherwise the deadline time. The force drain is captured by the 1728// deadline time being in the past. 1729func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1730 // Treat the nil case as a force drain so during an upgrade where a node may 1731 // not have a drain strategy but has Drain set to true, it is treated as a 1732 // force to mimick old behavior. 1733 if d == nil { 1734 return false, time.Time{} 1735 } 1736 1737 ns := d.Deadline.Nanoseconds() 1738 switch { 1739 case ns < 0: // Force 1740 return false, time.Time{} 1741 case ns == 0: // Infinite 1742 return true, time.Time{} 1743 default: 1744 return false, d.ForceDeadline 1745 } 1746} 1747 1748func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1749 if d == nil && o == nil { 1750 return true 1751 } else if o != nil && d == nil { 1752 return false 1753 } else if d != nil && o == nil { 1754 return false 1755 } 1756 1757 // Compare values 1758 if d.ForceDeadline != o.ForceDeadline { 1759 return false 1760 } else if d.Deadline != o.Deadline { 1761 return false 1762 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1763 return false 1764 } 1765 1766 return true 1767} 1768 1769const ( 1770 // DrainStatuses are the various states a drain can be in, as reflect in DrainMetadata 1771 DrainStatusDraining DrainStatus = "draining" 1772 DrainStatusComplete DrainStatus = "complete" 1773 DrainStatusCanceled DrainStatus = "canceled" 1774) 1775 1776type DrainStatus string 1777 1778// DrainMetadata contains information about the most recent drain operation for a given Node. 1779type DrainMetadata struct { 1780 // StartedAt is the time that the drain operation started. This is equal to Node.DrainStrategy.StartedAt, 1781 // if it exists 1782 StartedAt time.Time 1783 1784 // UpdatedAt is the time that that this struct was most recently updated, either via API action 1785 // or drain completion 1786 UpdatedAt time.Time 1787 1788 // Status reflects the status of the drain operation. 1789 Status DrainStatus 1790 1791 // AccessorID is the accessor ID of the ACL token used in the most recent API operation against this drain 1792 AccessorID string 1793 1794 // Meta includes the operator-submitted metadata about this drain operation 1795 Meta map[string]string 1796} 1797 1798func (m *DrainMetadata) Copy() *DrainMetadata { 1799 if m == nil { 1800 return nil 1801 } 1802 c := new(DrainMetadata) 1803 *c = *m 1804 c.Meta = helper.CopyMapStringString(m.Meta) 1805 return c 1806} 1807 1808// Node is a representation of a schedulable client node 1809type Node struct { 1810 // ID is a unique identifier for the node. It can be constructed 1811 // by doing a concatenation of the Name and Datacenter as a simple 1812 // approach. Alternatively a UUID may be used. 1813 ID string 1814 1815 // SecretID is an ID that is only known by the Node and the set of Servers. 1816 // It is not accessible via the API and is used to authenticate nodes 1817 // conducting privileged activities. 1818 SecretID string 1819 1820 // Datacenter for this node 1821 Datacenter string 1822 1823 // Node name 1824 Name string 1825 1826 // HTTPAddr is the address on which the Nomad client is listening for http 1827 // requests 1828 HTTPAddr string 1829 1830 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1831 TLSEnabled bool 1832 1833 // Attributes is an arbitrary set of key/value 1834 // data that can be used for constraints. Examples 1835 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1836 // "docker.runtime=1.8.3" 1837 Attributes map[string]string 1838 1839 // NodeResources captures the available resources on the client. 1840 NodeResources *NodeResources 1841 1842 // ReservedResources captures the set resources on the client that are 1843 // reserved from scheduling. 1844 ReservedResources *NodeReservedResources 1845 1846 // Resources is the available resources on the client. 1847 // For example 'cpu=2' 'memory=2048' 1848 // COMPAT(0.10): Remove after 0.10 1849 Resources *Resources 1850 1851 // Reserved is the set of resources that are reserved, 1852 // and should be subtracted from the total resources for 1853 // the purposes of scheduling. This may be provide certain 1854 // high-watermark tolerances or because of external schedulers 1855 // consuming resources. 1856 // COMPAT(0.10): Remove after 0.10 1857 Reserved *Resources 1858 1859 // Links are used to 'link' this client to external 1860 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1861 // 'ami=ami-123' 1862 Links map[string]string 1863 1864 // Meta is used to associate arbitrary metadata with this 1865 // client. This is opaque to Nomad. 1866 Meta map[string]string 1867 1868 // NodeClass is an opaque identifier used to group nodes 1869 // together for the purpose of determining scheduling pressure. 1870 NodeClass string 1871 1872 // ComputedClass is a unique id that identifies nodes with a common set of 1873 // attributes and capabilities. 1874 ComputedClass string 1875 1876 // DrainStrategy determines the node's draining behavior. 1877 // Will be non-nil only while draining. 1878 DrainStrategy *DrainStrategy 1879 1880 // SchedulingEligibility determines whether this node will receive new 1881 // placements. 1882 SchedulingEligibility string 1883 1884 // Status of this node 1885 Status string 1886 1887 // StatusDescription is meant to provide more human useful information 1888 StatusDescription string 1889 1890 // StatusUpdatedAt is the time stamp at which the state of the node was 1891 // updated 1892 StatusUpdatedAt int64 1893 1894 // Events is the most recent set of events generated for the node, 1895 // retaining only MaxRetainedNodeEvents number at a time 1896 Events []*NodeEvent 1897 1898 // Drivers is a map of driver names to current driver information 1899 Drivers map[string]*DriverInfo 1900 1901 // CSIControllerPlugins is a map of plugin names to current CSI Plugin info 1902 CSIControllerPlugins map[string]*CSIInfo 1903 // CSINodePlugins is a map of plugin names to current CSI Plugin info 1904 CSINodePlugins map[string]*CSIInfo 1905 1906 // HostVolumes is a map of host volume names to their configuration 1907 HostVolumes map[string]*ClientHostVolumeConfig 1908 1909 // LastDrain contains metadata about the most recent drain operation 1910 LastDrain *DrainMetadata 1911 1912 // Raft Indexes 1913 CreateIndex uint64 1914 ModifyIndex uint64 1915} 1916 1917// Sanitize returns a copy of the Node omitting confidential fields 1918// It only returns a copy if the Node contains the confidential fields 1919func (n *Node) Sanitize() *Node { 1920 if n == nil { 1921 return nil 1922 } 1923 if n.SecretID == "" { 1924 return n 1925 } 1926 clean := n.Copy() 1927 clean.SecretID = "" 1928 return clean 1929} 1930 1931// Ready returns true if the node is ready for running allocations 1932func (n *Node) Ready() bool { 1933 return n.Status == NodeStatusReady && n.DrainStrategy == nil && n.SchedulingEligibility == NodeSchedulingEligible 1934} 1935 1936func (n *Node) Canonicalize() { 1937 if n == nil { 1938 return 1939 } 1940 1941 // Ensure SchedulingEligibility is correctly set whenever draining so the plan applier and other scheduling logic 1942 // only need to check SchedulingEligibility when determining whether a placement is feasible on a node. 1943 if n.DrainStrategy != nil { 1944 n.SchedulingEligibility = NodeSchedulingIneligible 1945 } else if n.SchedulingEligibility == "" { 1946 n.SchedulingEligibility = NodeSchedulingEligible 1947 } 1948 1949 // COMPAT remove in 1.0 1950 // In v0.12.0 we introduced a separate node specific network resource struct 1951 // so we need to covert any pre 0.12 clients to the correct struct 1952 if n.NodeResources != nil && n.NodeResources.NodeNetworks == nil { 1953 if n.NodeResources.Networks != nil { 1954 for _, nr := range n.NodeResources.Networks { 1955 nnr := &NodeNetworkResource{ 1956 Mode: nr.Mode, 1957 Speed: nr.MBits, 1958 Device: nr.Device, 1959 } 1960 if nr.IP != "" { 1961 nnr.Addresses = []NodeNetworkAddress{ 1962 { 1963 Alias: "default", 1964 Address: nr.IP, 1965 }, 1966 } 1967 } 1968 n.NodeResources.NodeNetworks = append(n.NodeResources.NodeNetworks, nnr) 1969 } 1970 } 1971 } 1972} 1973 1974func (n *Node) Copy() *Node { 1975 if n == nil { 1976 return nil 1977 } 1978 nn := new(Node) 1979 *nn = *n 1980 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1981 nn.Resources = nn.Resources.Copy() 1982 nn.Reserved = nn.Reserved.Copy() 1983 nn.NodeResources = nn.NodeResources.Copy() 1984 nn.ReservedResources = nn.ReservedResources.Copy() 1985 nn.Links = helper.CopyMapStringString(nn.Links) 1986 nn.Meta = helper.CopyMapStringString(nn.Meta) 1987 nn.Events = copyNodeEvents(n.Events) 1988 nn.DrainStrategy = nn.DrainStrategy.Copy() 1989 nn.LastDrain = nn.LastDrain.Copy() 1990 nn.CSIControllerPlugins = copyNodeCSI(nn.CSIControllerPlugins) 1991 nn.CSINodePlugins = copyNodeCSI(nn.CSINodePlugins) 1992 nn.Drivers = copyNodeDrivers(n.Drivers) 1993 nn.HostVolumes = copyNodeHostVolumes(n.HostVolumes) 1994 return nn 1995} 1996 1997// copyNodeEvents is a helper to copy a list of NodeEvent's 1998func copyNodeEvents(events []*NodeEvent) []*NodeEvent { 1999 l := len(events) 2000 if l == 0 { 2001 return nil 2002 } 2003 2004 c := make([]*NodeEvent, l) 2005 for i, event := range events { 2006 c[i] = event.Copy() 2007 } 2008 return c 2009} 2010 2011// copyNodeCSI is a helper to copy a map of CSIInfo 2012func copyNodeCSI(plugins map[string]*CSIInfo) map[string]*CSIInfo { 2013 l := len(plugins) 2014 if l == 0 { 2015 return nil 2016 } 2017 2018 c := make(map[string]*CSIInfo, l) 2019 for plugin, info := range plugins { 2020 c[plugin] = info.Copy() 2021 } 2022 2023 return c 2024} 2025 2026// copyNodeDrivers is a helper to copy a map of DriverInfo 2027func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { 2028 l := len(drivers) 2029 if l == 0 { 2030 return nil 2031 } 2032 2033 c := make(map[string]*DriverInfo, l) 2034 for driver, info := range drivers { 2035 c[driver] = info.Copy() 2036 } 2037 return c 2038} 2039 2040// copyNodeHostVolumes is a helper to copy a map of string to Volume 2041func copyNodeHostVolumes(volumes map[string]*ClientHostVolumeConfig) map[string]*ClientHostVolumeConfig { 2042 l := len(volumes) 2043 if l == 0 { 2044 return nil 2045 } 2046 2047 c := make(map[string]*ClientHostVolumeConfig, l) 2048 for volume, v := range volumes { 2049 c[volume] = v.Copy() 2050 } 2051 2052 return c 2053} 2054 2055// TerminalStatus returns if the current status is terminal and 2056// will no longer transition. 2057func (n *Node) TerminalStatus() bool { 2058 switch n.Status { 2059 case NodeStatusDown: 2060 return true 2061 default: 2062 return false 2063 } 2064} 2065 2066// COMPAT(0.11): Remove in 0.11 2067// ComparableReservedResources returns the reserved resouces on the node 2068// handling upgrade paths. Reserved networks must be handled separately. After 2069// 0.11 calls to this should be replaced with: 2070// node.ReservedResources.Comparable() 2071func (n *Node) ComparableReservedResources() *ComparableResources { 2072 // See if we can no-op 2073 if n.Reserved == nil && n.ReservedResources == nil { 2074 return nil 2075 } 2076 2077 // Node already has 0.9+ behavior 2078 if n.ReservedResources != nil { 2079 return n.ReservedResources.Comparable() 2080 } 2081 2082 // Upgrade path 2083 return &ComparableResources{ 2084 Flattened: AllocatedTaskResources{ 2085 Cpu: AllocatedCpuResources{ 2086 CpuShares: int64(n.Reserved.CPU), 2087 }, 2088 Memory: AllocatedMemoryResources{ 2089 MemoryMB: int64(n.Reserved.MemoryMB), 2090 }, 2091 }, 2092 Shared: AllocatedSharedResources{ 2093 DiskMB: int64(n.Reserved.DiskMB), 2094 }, 2095 } 2096} 2097 2098// COMPAT(0.11): Remove in 0.11 2099// ComparableResources returns the resouces on the node 2100// handling upgrade paths. Networking must be handled separately. After 0.11 2101// calls to this should be replaced with: node.NodeResources.Comparable() 2102func (n *Node) ComparableResources() *ComparableResources { 2103 // Node already has 0.9+ behavior 2104 if n.NodeResources != nil { 2105 return n.NodeResources.Comparable() 2106 } 2107 2108 // Upgrade path 2109 return &ComparableResources{ 2110 Flattened: AllocatedTaskResources{ 2111 Cpu: AllocatedCpuResources{ 2112 CpuShares: int64(n.Resources.CPU), 2113 }, 2114 Memory: AllocatedMemoryResources{ 2115 MemoryMB: int64(n.Resources.MemoryMB), 2116 }, 2117 }, 2118 Shared: AllocatedSharedResources{ 2119 DiskMB: int64(n.Resources.DiskMB), 2120 }, 2121 } 2122} 2123 2124// Stub returns a summarized version of the node 2125func (n *Node) Stub(fields *NodeStubFields) *NodeListStub { 2126 2127 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 2128 2129 s := &NodeListStub{ 2130 Address: addr, 2131 ID: n.ID, 2132 Datacenter: n.Datacenter, 2133 Name: n.Name, 2134 NodeClass: n.NodeClass, 2135 Version: n.Attributes["nomad.version"], 2136 Drain: n.DrainStrategy != nil, 2137 SchedulingEligibility: n.SchedulingEligibility, 2138 Status: n.Status, 2139 StatusDescription: n.StatusDescription, 2140 Drivers: n.Drivers, 2141 HostVolumes: n.HostVolumes, 2142 LastDrain: n.LastDrain, 2143 CreateIndex: n.CreateIndex, 2144 ModifyIndex: n.ModifyIndex, 2145 } 2146 2147 if fields != nil { 2148 if fields.Resources { 2149 s.NodeResources = n.NodeResources 2150 s.ReservedResources = n.ReservedResources 2151 } 2152 } 2153 2154 return s 2155} 2156 2157// NodeListStub is used to return a subset of job information 2158// for the job list 2159type NodeListStub struct { 2160 Address string 2161 ID string 2162 Datacenter string 2163 Name string 2164 NodeClass string 2165 Version string 2166 Drain bool 2167 SchedulingEligibility string 2168 Status string 2169 StatusDescription string 2170 Drivers map[string]*DriverInfo 2171 HostVolumes map[string]*ClientHostVolumeConfig 2172 NodeResources *NodeResources `json:",omitempty"` 2173 ReservedResources *NodeReservedResources `json:",omitempty"` 2174 LastDrain *DrainMetadata 2175 CreateIndex uint64 2176 ModifyIndex uint64 2177} 2178 2179// NodeStubFields defines which fields are included in the NodeListStub. 2180type NodeStubFields struct { 2181 Resources bool 2182} 2183 2184// Resources is used to define the resources available 2185// on a client 2186type Resources struct { 2187 CPU int 2188 Cores int 2189 MemoryMB int 2190 MemoryMaxMB int 2191 DiskMB int 2192 IOPS int // COMPAT(0.10): Only being used to issue warnings 2193 Networks Networks 2194 Devices ResourceDevices 2195} 2196 2197const ( 2198 BytesInMegabyte = 1024 * 1024 2199) 2200 2201// DefaultResources is a small resources object that contains the 2202// default resources requests that we will provide to an object. 2203// --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 2204// be kept in sync. 2205func DefaultResources() *Resources { 2206 return &Resources{ 2207 CPU: 100, 2208 Cores: 0, 2209 MemoryMB: 300, 2210 } 2211} 2212 2213// MinResources is a small resources object that contains the 2214// absolute minimum resources that we will provide to an object. 2215// This should not be confused with the defaults which are 2216// provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 2217// api/resources.go and should be kept in sync. 2218func MinResources() *Resources { 2219 return &Resources{ 2220 CPU: 1, 2221 Cores: 0, 2222 MemoryMB: 10, 2223 } 2224} 2225 2226// DiskInBytes returns the amount of disk resources in bytes. 2227func (r *Resources) DiskInBytes() int64 { 2228 return int64(r.DiskMB * BytesInMegabyte) 2229} 2230 2231func (r *Resources) Validate() error { 2232 var mErr multierror.Error 2233 2234 if r.Cores > 0 && r.CPU > 0 { 2235 mErr.Errors = append(mErr.Errors, errors.New("Task can only ask for 'cpu' or 'cores' resource, not both.")) 2236 } 2237 2238 if err := r.MeetsMinResources(); err != nil { 2239 mErr.Errors = append(mErr.Errors, err) 2240 } 2241 2242 // Ensure the task isn't asking for disk resources 2243 if r.DiskMB > 0 { 2244 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2245 } 2246 2247 for i, d := range r.Devices { 2248 if err := d.Validate(); err != nil { 2249 mErr.Errors = append(mErr.Errors, fmt.Errorf("device %d failed validation: %v", i+1, err)) 2250 } 2251 } 2252 2253 if r.MemoryMaxMB != 0 && r.MemoryMaxMB < r.MemoryMB { 2254 mErr.Errors = append(mErr.Errors, fmt.Errorf("MemoryMaxMB value (%d) should be larger than MemoryMB value (%d)", r.MemoryMaxMB, r.MemoryMB)) 2255 } 2256 2257 return mErr.ErrorOrNil() 2258} 2259 2260// Merge merges this resource with another resource. 2261// COMPAT(0.10): Remove in 0.10 2262func (r *Resources) Merge(other *Resources) { 2263 if other.CPU != 0 { 2264 r.CPU = other.CPU 2265 } 2266 if other.Cores != 0 { 2267 r.Cores = other.Cores 2268 } 2269 if other.MemoryMB != 0 { 2270 r.MemoryMB = other.MemoryMB 2271 } 2272 if other.MemoryMaxMB != 0 { 2273 r.MemoryMaxMB = other.MemoryMaxMB 2274 } 2275 if other.DiskMB != 0 { 2276 r.DiskMB = other.DiskMB 2277 } 2278 if len(other.Networks) != 0 { 2279 r.Networks = other.Networks 2280 } 2281 if len(other.Devices) != 0 { 2282 r.Devices = other.Devices 2283 } 2284} 2285 2286// COMPAT(0.10): Remove in 0.10 2287func (r *Resources) Equals(o *Resources) bool { 2288 if r == o { 2289 return true 2290 } 2291 if r == nil || o == nil { 2292 return false 2293 } 2294 return r.CPU == o.CPU && 2295 r.Cores == o.Cores && 2296 r.MemoryMB == o.MemoryMB && 2297 r.MemoryMaxMB == o.MemoryMaxMB && 2298 r.DiskMB == o.DiskMB && 2299 r.IOPS == o.IOPS && 2300 r.Networks.Equals(&o.Networks) && 2301 r.Devices.Equals(&o.Devices) 2302} 2303 2304// COMPAT(0.10): Remove in 0.10 2305// ResourceDevices are part of Resources 2306type ResourceDevices []*RequestedDevice 2307 2308// COMPAT(0.10): Remove in 0.10 2309// Equals ResourceDevices as set keyed by Name 2310func (d *ResourceDevices) Equals(o *ResourceDevices) bool { 2311 if d == o { 2312 return true 2313 } 2314 if d == nil || o == nil { 2315 return false 2316 } 2317 if len(*d) != len(*o) { 2318 return false 2319 } 2320 m := make(map[string]*RequestedDevice, len(*d)) 2321 for _, e := range *d { 2322 m[e.Name] = e 2323 } 2324 for _, oe := range *o { 2325 de, ok := m[oe.Name] 2326 if !ok || !de.Equals(oe) { 2327 return false 2328 } 2329 } 2330 return true 2331} 2332 2333// COMPAT(0.10): Remove in 0.10 2334func (r *Resources) Canonicalize() { 2335 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2336 // problems since we use reflect DeepEquals. 2337 if len(r.Networks) == 0 { 2338 r.Networks = nil 2339 } 2340 if len(r.Devices) == 0 { 2341 r.Devices = nil 2342 } 2343 2344 for _, n := range r.Networks { 2345 n.Canonicalize() 2346 } 2347} 2348 2349// MeetsMinResources returns an error if the resources specified are less than 2350// the minimum allowed. 2351// This is based on the minimums defined in the Resources type 2352// COMPAT(0.10): Remove in 0.10 2353func (r *Resources) MeetsMinResources() error { 2354 var mErr multierror.Error 2355 minResources := MinResources() 2356 if r.CPU < minResources.CPU && r.Cores == 0 { 2357 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 2358 } 2359 if r.MemoryMB < minResources.MemoryMB { 2360 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 2361 } 2362 return mErr.ErrorOrNil() 2363} 2364 2365// Copy returns a deep copy of the resources 2366func (r *Resources) Copy() *Resources { 2367 if r == nil { 2368 return nil 2369 } 2370 newR := new(Resources) 2371 *newR = *r 2372 2373 // Copy the network objects 2374 newR.Networks = r.Networks.Copy() 2375 2376 // Copy the devices 2377 if r.Devices != nil { 2378 n := len(r.Devices) 2379 newR.Devices = make([]*RequestedDevice, n) 2380 for i := 0; i < n; i++ { 2381 newR.Devices[i] = r.Devices[i].Copy() 2382 } 2383 } 2384 2385 return newR 2386} 2387 2388// NetIndex finds the matching net index using device name 2389// COMPAT(0.10): Remove in 0.10 2390func (r *Resources) NetIndex(n *NetworkResource) int { 2391 return r.Networks.NetIndex(n) 2392} 2393 2394// Add adds the resources of the delta to this, potentially 2395// returning an error if not possible. 2396// COMPAT(0.10): Remove in 0.10 2397func (r *Resources) Add(delta *Resources) { 2398 if delta == nil { 2399 return 2400 } 2401 2402 r.CPU += delta.CPU 2403 r.MemoryMB += delta.MemoryMB 2404 if delta.MemoryMaxMB > 0 { 2405 r.MemoryMaxMB += delta.MemoryMaxMB 2406 } else { 2407 r.MemoryMaxMB += delta.MemoryMB 2408 } 2409 r.DiskMB += delta.DiskMB 2410 2411 for _, n := range delta.Networks { 2412 // Find the matching interface by IP or CIDR 2413 idx := r.NetIndex(n) 2414 if idx == -1 { 2415 r.Networks = append(r.Networks, n.Copy()) 2416 } else { 2417 r.Networks[idx].Add(n) 2418 } 2419 } 2420} 2421 2422// COMPAT(0.10): Remove in 0.10 2423func (r *Resources) GoString() string { 2424 return fmt.Sprintf("*%#v", *r) 2425} 2426 2427// NodeNetworkResource is used to describe a fingerprinted network of a node 2428type NodeNetworkResource struct { 2429 Mode string // host for physical networks, cni/<name> for cni networks 2430 2431 // The following apply only to host networks 2432 Device string // interface name 2433 MacAddress string 2434 Speed int 2435 2436 Addresses []NodeNetworkAddress // not valid for cni, for bridge there will only be 1 ip 2437} 2438 2439func (n *NodeNetworkResource) Equals(o *NodeNetworkResource) bool { 2440 return reflect.DeepEqual(n, o) 2441} 2442 2443func (n *NodeNetworkResource) HasAlias(alias string) bool { 2444 for _, addr := range n.Addresses { 2445 if addr.Alias == alias { 2446 return true 2447 } 2448 } 2449 return false 2450} 2451 2452type NodeNetworkAF string 2453 2454const ( 2455 NodeNetworkAF_IPv4 NodeNetworkAF = "ipv4" 2456 NodeNetworkAF_IPv6 NodeNetworkAF = "ipv6" 2457) 2458 2459type NodeNetworkAddress struct { 2460 Family NodeNetworkAF 2461 Alias string 2462 Address string 2463 ReservedPorts string 2464 Gateway string // default route for this address 2465} 2466 2467type AllocatedPortMapping struct { 2468 Label string 2469 Value int 2470 To int 2471 HostIP string 2472} 2473 2474type AllocatedPorts []AllocatedPortMapping 2475 2476func (p AllocatedPorts) Get(label string) (AllocatedPortMapping, bool) { 2477 for _, port := range p { 2478 if port.Label == label { 2479 return port, true 2480 } 2481 } 2482 2483 return AllocatedPortMapping{}, false 2484} 2485 2486type Port struct { 2487 // Label is the key for HCL port stanzas: port "foo" {} 2488 Label string 2489 2490 // Value is the static or dynamic port value. For dynamic ports this 2491 // will be 0 in the jobspec and set by the scheduler. 2492 Value int 2493 2494 // To is the port inside a network namespace where this port is 2495 // forwarded. -1 is an internal sentinel value used by Consul Connect 2496 // to mean "same as the host port." 2497 To int 2498 2499 // HostNetwork is the name of the network this port should be assigned 2500 // to. Jobs with a HostNetwork set can only be placed on nodes with 2501 // that host network available. 2502 HostNetwork string 2503} 2504 2505type DNSConfig struct { 2506 Servers []string 2507 Searches []string 2508 Options []string 2509} 2510 2511func (d *DNSConfig) Copy() *DNSConfig { 2512 if d == nil { 2513 return nil 2514 } 2515 newD := new(DNSConfig) 2516 newD.Servers = make([]string, len(d.Servers)) 2517 copy(newD.Servers, d.Servers) 2518 newD.Searches = make([]string, len(d.Searches)) 2519 copy(newD.Searches, d.Searches) 2520 newD.Options = make([]string, len(d.Options)) 2521 copy(newD.Options, d.Options) 2522 return newD 2523} 2524 2525// NetworkResource is used to represent available network 2526// resources 2527type NetworkResource struct { 2528 Mode string // Mode of the network 2529 Device string // Name of the device 2530 CIDR string // CIDR block of addresses 2531 IP string // Host IP address 2532 MBits int // Throughput 2533 DNS *DNSConfig // DNS Configuration 2534 ReservedPorts []Port // Host Reserved ports 2535 DynamicPorts []Port // Host Dynamically assigned ports 2536} 2537 2538func (nr *NetworkResource) Hash() uint32 { 2539 var data []byte 2540 data = append(data, []byte(fmt.Sprintf("%s%s%s%s%d", nr.Mode, nr.Device, nr.CIDR, nr.IP, nr.MBits))...) 2541 2542 for i, port := range nr.ReservedPorts { 2543 data = append(data, []byte(fmt.Sprintf("r%d%s%d%d", i, port.Label, port.Value, port.To))...) 2544 } 2545 2546 for i, port := range nr.DynamicPorts { 2547 data = append(data, []byte(fmt.Sprintf("d%d%s%d%d", i, port.Label, port.Value, port.To))...) 2548 } 2549 2550 return crc32.ChecksumIEEE(data) 2551} 2552 2553func (nr *NetworkResource) Equals(other *NetworkResource) bool { 2554 return nr.Hash() == other.Hash() 2555} 2556 2557func (n *NetworkResource) Canonicalize() { 2558 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2559 // problems since we use reflect DeepEquals. 2560 if len(n.ReservedPorts) == 0 { 2561 n.ReservedPorts = nil 2562 } 2563 if len(n.DynamicPorts) == 0 { 2564 n.DynamicPorts = nil 2565 } 2566 2567 for i, p := range n.DynamicPorts { 2568 if p.HostNetwork == "" { 2569 n.DynamicPorts[i].HostNetwork = "default" 2570 } 2571 } 2572 for i, p := range n.ReservedPorts { 2573 if p.HostNetwork == "" { 2574 n.ReservedPorts[i].HostNetwork = "default" 2575 } 2576 } 2577} 2578 2579// Copy returns a deep copy of the network resource 2580func (n *NetworkResource) Copy() *NetworkResource { 2581 if n == nil { 2582 return nil 2583 } 2584 newR := new(NetworkResource) 2585 *newR = *n 2586 if n.ReservedPorts != nil { 2587 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 2588 copy(newR.ReservedPorts, n.ReservedPorts) 2589 } 2590 if n.DynamicPorts != nil { 2591 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 2592 copy(newR.DynamicPorts, n.DynamicPorts) 2593 } 2594 return newR 2595} 2596 2597// Add adds the resources of the delta to this, potentially 2598// returning an error if not possible. 2599func (n *NetworkResource) Add(delta *NetworkResource) { 2600 if len(delta.ReservedPorts) > 0 { 2601 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 2602 } 2603 n.MBits += delta.MBits 2604 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 2605} 2606 2607func (n *NetworkResource) GoString() string { 2608 return fmt.Sprintf("*%#v", *n) 2609} 2610 2611// PortLabels returns a map of port labels to their assigned host ports. 2612func (n *NetworkResource) PortLabels() map[string]int { 2613 num := len(n.ReservedPorts) + len(n.DynamicPorts) 2614 labelValues := make(map[string]int, num) 2615 for _, port := range n.ReservedPorts { 2616 labelValues[port.Label] = port.Value 2617 } 2618 for _, port := range n.DynamicPorts { 2619 labelValues[port.Label] = port.Value 2620 } 2621 return labelValues 2622} 2623 2624// Networks defined for a task on the Resources struct. 2625type Networks []*NetworkResource 2626 2627func (ns Networks) Copy() Networks { 2628 if len(ns) == 0 { 2629 return nil 2630 } 2631 2632 out := make([]*NetworkResource, len(ns)) 2633 for i := range ns { 2634 out[i] = ns[i].Copy() 2635 } 2636 return out 2637} 2638 2639// Port assignment and IP for the given label or empty values. 2640func (ns Networks) Port(label string) AllocatedPortMapping { 2641 for _, n := range ns { 2642 for _, p := range n.ReservedPorts { 2643 if p.Label == label { 2644 return AllocatedPortMapping{ 2645 Label: label, 2646 Value: p.Value, 2647 To: p.To, 2648 HostIP: n.IP, 2649 } 2650 } 2651 } 2652 for _, p := range n.DynamicPorts { 2653 if p.Label == label { 2654 return AllocatedPortMapping{ 2655 Label: label, 2656 Value: p.Value, 2657 To: p.To, 2658 HostIP: n.IP, 2659 } 2660 } 2661 } 2662 } 2663 return AllocatedPortMapping{} 2664} 2665 2666func (ns Networks) NetIndex(n *NetworkResource) int { 2667 for idx, net := range ns { 2668 if net.Device == n.Device { 2669 return idx 2670 } 2671 } 2672 return -1 2673} 2674 2675// RequestedDevice is used to request a device for a task. 2676type RequestedDevice struct { 2677 // Name is the request name. The possible values are as follows: 2678 // * <type>: A single value only specifies the type of request. 2679 // * <vendor>/<type>: A single slash delimiter assumes the vendor and type of device is specified. 2680 // * <vendor>/<type>/<name>: Two slash delimiters assume vendor, type and specific model are specified. 2681 // 2682 // Examples are as follows: 2683 // * "gpu" 2684 // * "nvidia/gpu" 2685 // * "nvidia/gpu/GTX2080Ti" 2686 Name string 2687 2688 // Count is the number of requested devices 2689 Count uint64 2690 2691 // Constraints are a set of constraints to apply when selecting the device 2692 // to use. 2693 Constraints Constraints 2694 2695 // Affinities are a set of affinities to apply when selecting the device 2696 // to use. 2697 Affinities Affinities 2698} 2699 2700func (r *RequestedDevice) Equals(o *RequestedDevice) bool { 2701 if r == o { 2702 return true 2703 } 2704 if r == nil || o == nil { 2705 return false 2706 } 2707 return r.Name == o.Name && 2708 r.Count == o.Count && 2709 r.Constraints.Equals(&o.Constraints) && 2710 r.Affinities.Equals(&o.Affinities) 2711} 2712 2713func (r *RequestedDevice) Copy() *RequestedDevice { 2714 if r == nil { 2715 return nil 2716 } 2717 2718 nr := *r 2719 nr.Constraints = CopySliceConstraints(nr.Constraints) 2720 nr.Affinities = CopySliceAffinities(nr.Affinities) 2721 2722 return &nr 2723} 2724 2725func (r *RequestedDevice) ID() *DeviceIdTuple { 2726 if r == nil || r.Name == "" { 2727 return nil 2728 } 2729 2730 parts := strings.SplitN(r.Name, "/", 3) 2731 switch len(parts) { 2732 case 1: 2733 return &DeviceIdTuple{ 2734 Type: parts[0], 2735 } 2736 case 2: 2737 return &DeviceIdTuple{ 2738 Vendor: parts[0], 2739 Type: parts[1], 2740 } 2741 default: 2742 return &DeviceIdTuple{ 2743 Vendor: parts[0], 2744 Type: parts[1], 2745 Name: parts[2], 2746 } 2747 } 2748} 2749 2750func (r *RequestedDevice) Validate() error { 2751 if r == nil { 2752 return nil 2753 } 2754 2755 var mErr multierror.Error 2756 if r.Name == "" { 2757 _ = multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) 2758 } 2759 2760 for idx, constr := range r.Constraints { 2761 // Ensure that the constraint doesn't use an operand we do not allow 2762 switch constr.Operand { 2763 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2764 outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) 2765 _ = multierror.Append(&mErr, outer) 2766 default: 2767 if err := constr.Validate(); err != nil { 2768 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2769 _ = multierror.Append(&mErr, outer) 2770 } 2771 } 2772 } 2773 for idx, affinity := range r.Affinities { 2774 if err := affinity.Validate(); err != nil { 2775 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2776 _ = multierror.Append(&mErr, outer) 2777 } 2778 } 2779 2780 return mErr.ErrorOrNil() 2781} 2782 2783// NodeResources is used to define the resources available on a client node. 2784type NodeResources struct { 2785 Cpu NodeCpuResources 2786 Memory NodeMemoryResources 2787 Disk NodeDiskResources 2788 Networks Networks 2789 NodeNetworks []*NodeNetworkResource 2790 Devices []*NodeDeviceResource 2791} 2792 2793func (n *NodeResources) Copy() *NodeResources { 2794 if n == nil { 2795 return nil 2796 } 2797 2798 newN := new(NodeResources) 2799 *newN = *n 2800 2801 // Copy the networks 2802 newN.Networks = n.Networks.Copy() 2803 2804 // Copy the devices 2805 if n.Devices != nil { 2806 devices := len(n.Devices) 2807 newN.Devices = make([]*NodeDeviceResource, devices) 2808 for i := 0; i < devices; i++ { 2809 newN.Devices[i] = n.Devices[i].Copy() 2810 } 2811 } 2812 2813 return newN 2814} 2815 2816// Comparable returns a comparable version of the nodes resources. This 2817// conversion can be lossy so care must be taken when using it. 2818func (n *NodeResources) Comparable() *ComparableResources { 2819 if n == nil { 2820 return nil 2821 } 2822 2823 c := &ComparableResources{ 2824 Flattened: AllocatedTaskResources{ 2825 Cpu: AllocatedCpuResources{ 2826 CpuShares: n.Cpu.CpuShares, 2827 ReservedCores: n.Cpu.ReservableCpuCores, 2828 }, 2829 Memory: AllocatedMemoryResources{ 2830 MemoryMB: n.Memory.MemoryMB, 2831 }, 2832 Networks: n.Networks, 2833 }, 2834 Shared: AllocatedSharedResources{ 2835 DiskMB: n.Disk.DiskMB, 2836 }, 2837 } 2838 return c 2839} 2840 2841func (n *NodeResources) Merge(o *NodeResources) { 2842 if o == nil { 2843 return 2844 } 2845 2846 n.Cpu.Merge(&o.Cpu) 2847 n.Memory.Merge(&o.Memory) 2848 n.Disk.Merge(&o.Disk) 2849 2850 if len(o.Networks) != 0 { 2851 n.Networks = append(n.Networks, o.Networks...) 2852 } 2853 2854 if len(o.Devices) != 0 { 2855 n.Devices = o.Devices 2856 } 2857 2858 if len(o.NodeNetworks) != 0 { 2859 lookupNetwork := func(nets []*NodeNetworkResource, name string) (int, *NodeNetworkResource) { 2860 for i, nw := range nets { 2861 if nw.Device == name { 2862 return i, nw 2863 } 2864 } 2865 return 0, nil 2866 } 2867 2868 for _, nw := range o.NodeNetworks { 2869 if i, nnw := lookupNetwork(n.NodeNetworks, nw.Device); nnw != nil { 2870 n.NodeNetworks[i] = nw 2871 } else { 2872 n.NodeNetworks = append(n.NodeNetworks, nw) 2873 } 2874 } 2875 } 2876} 2877 2878func (n *NodeResources) Equals(o *NodeResources) bool { 2879 if o == nil && n == nil { 2880 return true 2881 } else if o == nil { 2882 return false 2883 } else if n == nil { 2884 return false 2885 } 2886 2887 if !n.Cpu.Equals(&o.Cpu) { 2888 return false 2889 } 2890 if !n.Memory.Equals(&o.Memory) { 2891 return false 2892 } 2893 if !n.Disk.Equals(&o.Disk) { 2894 return false 2895 } 2896 if !n.Networks.Equals(&o.Networks) { 2897 return false 2898 } 2899 2900 // Check the devices 2901 if !DevicesEquals(n.Devices, o.Devices) { 2902 return false 2903 } 2904 2905 if !NodeNetworksEquals(n.NodeNetworks, o.NodeNetworks) { 2906 return false 2907 } 2908 2909 return true 2910} 2911 2912// Equals equates Networks as a set 2913func (ns *Networks) Equals(o *Networks) bool { 2914 if ns == o { 2915 return true 2916 } 2917 if ns == nil || o == nil { 2918 return false 2919 } 2920 if len(*ns) != len(*o) { 2921 return false 2922 } 2923SETEQUALS: 2924 for _, ne := range *ns { 2925 for _, oe := range *o { 2926 if ne.Equals(oe) { 2927 continue SETEQUALS 2928 } 2929 } 2930 return false 2931 } 2932 return true 2933} 2934 2935// DevicesEquals returns true if the two device arrays are set equal 2936func DevicesEquals(d1, d2 []*NodeDeviceResource) bool { 2937 if len(d1) != len(d2) { 2938 return false 2939 } 2940 idMap := make(map[DeviceIdTuple]*NodeDeviceResource, len(d1)) 2941 for _, d := range d1 { 2942 idMap[*d.ID()] = d 2943 } 2944 for _, otherD := range d2 { 2945 if d, ok := idMap[*otherD.ID()]; !ok || !d.Equals(otherD) { 2946 return false 2947 } 2948 } 2949 2950 return true 2951} 2952 2953func NodeNetworksEquals(n1, n2 []*NodeNetworkResource) bool { 2954 if len(n1) != len(n2) { 2955 return false 2956 } 2957 2958 netMap := make(map[string]*NodeNetworkResource, len(n1)) 2959 for _, n := range n1 { 2960 netMap[n.Device] = n 2961 } 2962 for _, otherN := range n2 { 2963 if n, ok := netMap[otherN.Device]; !ok || !n.Equals(otherN) { 2964 return false 2965 } 2966 } 2967 2968 return true 2969 2970} 2971 2972// NodeCpuResources captures the CPU resources of the node. 2973type NodeCpuResources struct { 2974 // CpuShares is the CPU shares available. This is calculated by number of 2975 // cores multiplied by the core frequency. 2976 CpuShares int64 2977 2978 // TotalCpuCores is the total number of cores on the machine. This includes cores not in 2979 // the agent's cpuset if on a linux platform 2980 TotalCpuCores uint16 2981 2982 // ReservableCpuCores is the set of cpus which are available to be reserved on the Node. 2983 // This value is currently only reported on Linux platforms which support cgroups and is 2984 // discovered by inspecting the cpuset of the agent's cgroup. 2985 ReservableCpuCores []uint16 2986} 2987 2988func (n *NodeCpuResources) Merge(o *NodeCpuResources) { 2989 if o == nil { 2990 return 2991 } 2992 2993 if o.CpuShares != 0 { 2994 n.CpuShares = o.CpuShares 2995 } 2996 2997 if o.TotalCpuCores != 0 { 2998 n.TotalCpuCores = o.TotalCpuCores 2999 } 3000 3001 if len(o.ReservableCpuCores) != 0 { 3002 n.ReservableCpuCores = o.ReservableCpuCores 3003 } 3004} 3005 3006func (n *NodeCpuResources) Equals(o *NodeCpuResources) bool { 3007 if o == nil && n == nil { 3008 return true 3009 } else if o == nil { 3010 return false 3011 } else if n == nil { 3012 return false 3013 } 3014 3015 if n.CpuShares != o.CpuShares { 3016 return false 3017 } 3018 3019 if n.TotalCpuCores != o.TotalCpuCores { 3020 return false 3021 } 3022 3023 if len(n.ReservableCpuCores) != len(o.ReservableCpuCores) { 3024 return false 3025 } 3026 for i := range n.ReservableCpuCores { 3027 if n.ReservableCpuCores[i] != o.ReservableCpuCores[i] { 3028 return false 3029 } 3030 } 3031 return true 3032} 3033 3034func (n *NodeCpuResources) SharesPerCore() int64 { 3035 return n.CpuShares / int64(n.TotalCpuCores) 3036} 3037 3038// NodeMemoryResources captures the memory resources of the node 3039type NodeMemoryResources struct { 3040 // MemoryMB is the total available memory on the node 3041 MemoryMB int64 3042} 3043 3044func (n *NodeMemoryResources) Merge(o *NodeMemoryResources) { 3045 if o == nil { 3046 return 3047 } 3048 3049 if o.MemoryMB != 0 { 3050 n.MemoryMB = o.MemoryMB 3051 } 3052} 3053 3054func (n *NodeMemoryResources) Equals(o *NodeMemoryResources) bool { 3055 if o == nil && n == nil { 3056 return true 3057 } else if o == nil { 3058 return false 3059 } else if n == nil { 3060 return false 3061 } 3062 3063 if n.MemoryMB != o.MemoryMB { 3064 return false 3065 } 3066 3067 return true 3068} 3069 3070// NodeDiskResources captures the disk resources of the node 3071type NodeDiskResources struct { 3072 // DiskMB is the total available disk space on the node 3073 DiskMB int64 3074} 3075 3076func (n *NodeDiskResources) Merge(o *NodeDiskResources) { 3077 if o == nil { 3078 return 3079 } 3080 if o.DiskMB != 0 { 3081 n.DiskMB = o.DiskMB 3082 } 3083} 3084 3085func (n *NodeDiskResources) Equals(o *NodeDiskResources) bool { 3086 if o == nil && n == nil { 3087 return true 3088 } else if o == nil { 3089 return false 3090 } else if n == nil { 3091 return false 3092 } 3093 3094 if n.DiskMB != o.DiskMB { 3095 return false 3096 } 3097 3098 return true 3099} 3100 3101// DeviceIdTuple is the tuple that identifies a device 3102type DeviceIdTuple struct { 3103 Vendor string 3104 Type string 3105 Name string 3106} 3107 3108func (d *DeviceIdTuple) String() string { 3109 if d == nil { 3110 return "" 3111 } 3112 3113 return fmt.Sprintf("%s/%s/%s", d.Vendor, d.Type, d.Name) 3114} 3115 3116// Matches returns if this Device ID is a superset of the passed ID. 3117func (id *DeviceIdTuple) Matches(other *DeviceIdTuple) bool { 3118 if other == nil { 3119 return false 3120 } 3121 3122 if other.Name != "" && other.Name != id.Name { 3123 return false 3124 } 3125 3126 if other.Vendor != "" && other.Vendor != id.Vendor { 3127 return false 3128 } 3129 3130 if other.Type != "" && other.Type != id.Type { 3131 return false 3132 } 3133 3134 return true 3135} 3136 3137// Equals returns if this Device ID is the same as the passed ID. 3138func (id *DeviceIdTuple) Equals(o *DeviceIdTuple) bool { 3139 if id == nil && o == nil { 3140 return true 3141 } else if id == nil || o == nil { 3142 return false 3143 } 3144 3145 return o.Vendor == id.Vendor && o.Type == id.Type && o.Name == id.Name 3146} 3147 3148// NodeDeviceResource captures a set of devices sharing a common 3149// vendor/type/device_name tuple. 3150type NodeDeviceResource struct { 3151 Vendor string 3152 Type string 3153 Name string 3154 Instances []*NodeDevice 3155 Attributes map[string]*psstructs.Attribute 3156} 3157 3158func (n *NodeDeviceResource) ID() *DeviceIdTuple { 3159 if n == nil { 3160 return nil 3161 } 3162 3163 return &DeviceIdTuple{ 3164 Vendor: n.Vendor, 3165 Type: n.Type, 3166 Name: n.Name, 3167 } 3168} 3169 3170func (n *NodeDeviceResource) Copy() *NodeDeviceResource { 3171 if n == nil { 3172 return nil 3173 } 3174 3175 // Copy the primitives 3176 nn := *n 3177 3178 // Copy the device instances 3179 if l := len(nn.Instances); l != 0 { 3180 nn.Instances = make([]*NodeDevice, 0, l) 3181 for _, d := range n.Instances { 3182 nn.Instances = append(nn.Instances, d.Copy()) 3183 } 3184 } 3185 3186 // Copy the Attributes 3187 nn.Attributes = psstructs.CopyMapStringAttribute(nn.Attributes) 3188 3189 return &nn 3190} 3191 3192func (n *NodeDeviceResource) Equals(o *NodeDeviceResource) bool { 3193 if o == nil && n == nil { 3194 return true 3195 } else if o == nil { 3196 return false 3197 } else if n == nil { 3198 return false 3199 } 3200 3201 if n.Vendor != o.Vendor { 3202 return false 3203 } else if n.Type != o.Type { 3204 return false 3205 } else if n.Name != o.Name { 3206 return false 3207 } 3208 3209 // Check the attributes 3210 if len(n.Attributes) != len(o.Attributes) { 3211 return false 3212 } 3213 for k, v := range n.Attributes { 3214 if otherV, ok := o.Attributes[k]; !ok || v != otherV { 3215 return false 3216 } 3217 } 3218 3219 // Check the instances 3220 if len(n.Instances) != len(o.Instances) { 3221 return false 3222 } 3223 idMap := make(map[string]*NodeDevice, len(n.Instances)) 3224 for _, d := range n.Instances { 3225 idMap[d.ID] = d 3226 } 3227 for _, otherD := range o.Instances { 3228 if d, ok := idMap[otherD.ID]; !ok || !d.Equals(otherD) { 3229 return false 3230 } 3231 } 3232 3233 return true 3234} 3235 3236// NodeDevice is an instance of a particular device. 3237type NodeDevice struct { 3238 // ID is the ID of the device. 3239 ID string 3240 3241 // Healthy captures whether the device is healthy. 3242 Healthy bool 3243 3244 // HealthDescription is used to provide a human readable description of why 3245 // the device may be unhealthy. 3246 HealthDescription string 3247 3248 // Locality stores HW locality information for the node to optionally be 3249 // used when making placement decisions. 3250 Locality *NodeDeviceLocality 3251} 3252 3253func (n *NodeDevice) Equals(o *NodeDevice) bool { 3254 if o == nil && n == nil { 3255 return true 3256 } else if o == nil { 3257 return false 3258 } else if n == nil { 3259 return false 3260 } 3261 3262 if n.ID != o.ID { 3263 return false 3264 } else if n.Healthy != o.Healthy { 3265 return false 3266 } else if n.HealthDescription != o.HealthDescription { 3267 return false 3268 } else if !n.Locality.Equals(o.Locality) { 3269 return false 3270 } 3271 3272 return false 3273} 3274 3275func (n *NodeDevice) Copy() *NodeDevice { 3276 if n == nil { 3277 return nil 3278 } 3279 3280 // Copy the primitives 3281 nn := *n 3282 3283 // Copy the locality 3284 nn.Locality = nn.Locality.Copy() 3285 3286 return &nn 3287} 3288 3289// NodeDeviceLocality stores information about the devices hardware locality on 3290// the node. 3291type NodeDeviceLocality struct { 3292 // PciBusID is the PCI Bus ID for the device. 3293 PciBusID string 3294} 3295 3296func (n *NodeDeviceLocality) Equals(o *NodeDeviceLocality) bool { 3297 if o == nil && n == nil { 3298 return true 3299 } else if o == nil { 3300 return false 3301 } else if n == nil { 3302 return false 3303 } 3304 3305 if n.PciBusID != o.PciBusID { 3306 return false 3307 } 3308 3309 return true 3310} 3311 3312func (n *NodeDeviceLocality) Copy() *NodeDeviceLocality { 3313 if n == nil { 3314 return nil 3315 } 3316 3317 // Copy the primitives 3318 nn := *n 3319 return &nn 3320} 3321 3322// NodeReservedResources is used to capture the resources on a client node that 3323// should be reserved and not made available to jobs. 3324type NodeReservedResources struct { 3325 Cpu NodeReservedCpuResources 3326 Memory NodeReservedMemoryResources 3327 Disk NodeReservedDiskResources 3328 Networks NodeReservedNetworkResources 3329} 3330 3331func (n *NodeReservedResources) Copy() *NodeReservedResources { 3332 if n == nil { 3333 return nil 3334 } 3335 newN := new(NodeReservedResources) 3336 *newN = *n 3337 return newN 3338} 3339 3340// Comparable returns a comparable version of the node's reserved resources. The 3341// returned resources doesn't contain any network information. This conversion 3342// can be lossy so care must be taken when using it. 3343func (n *NodeReservedResources) Comparable() *ComparableResources { 3344 if n == nil { 3345 return nil 3346 } 3347 3348 c := &ComparableResources{ 3349 Flattened: AllocatedTaskResources{ 3350 Cpu: AllocatedCpuResources{ 3351 CpuShares: n.Cpu.CpuShares, 3352 ReservedCores: n.Cpu.ReservedCpuCores, 3353 }, 3354 Memory: AllocatedMemoryResources{ 3355 MemoryMB: n.Memory.MemoryMB, 3356 }, 3357 }, 3358 Shared: AllocatedSharedResources{ 3359 DiskMB: n.Disk.DiskMB, 3360 }, 3361 } 3362 return c 3363} 3364 3365// NodeReservedCpuResources captures the reserved CPU resources of the node. 3366type NodeReservedCpuResources struct { 3367 CpuShares int64 3368 ReservedCpuCores []uint16 3369} 3370 3371// NodeReservedMemoryResources captures the reserved memory resources of the node. 3372type NodeReservedMemoryResources struct { 3373 MemoryMB int64 3374} 3375 3376// NodeReservedDiskResources captures the reserved disk resources of the node. 3377type NodeReservedDiskResources struct { 3378 DiskMB int64 3379} 3380 3381// NodeReservedNetworkResources captures the reserved network resources of the node. 3382type NodeReservedNetworkResources struct { 3383 // ReservedHostPorts is the set of ports reserved on all host network 3384 // interfaces. Its format is a comma separate list of integers or integer 3385 // ranges. (80,443,1000-2000,2005) 3386 ReservedHostPorts string 3387} 3388 3389// ParsePortHostPorts returns the reserved host ports. 3390func (n *NodeReservedNetworkResources) ParseReservedHostPorts() ([]uint64, error) { 3391 return ParsePortRanges(n.ReservedHostPorts) 3392} 3393 3394// AllocatedResources is the set of resources to be used by an allocation. 3395type AllocatedResources struct { 3396 // Tasks is a mapping of task name to the resources for the task. 3397 Tasks map[string]*AllocatedTaskResources 3398 TaskLifecycles map[string]*TaskLifecycleConfig 3399 3400 // Shared is the set of resource that are shared by all tasks in the group. 3401 Shared AllocatedSharedResources 3402} 3403 3404func (a *AllocatedResources) Copy() *AllocatedResources { 3405 if a == nil { 3406 return nil 3407 } 3408 3409 out := AllocatedResources{ 3410 Shared: a.Shared.Copy(), 3411 } 3412 3413 if a.Tasks != nil { 3414 out.Tasks = make(map[string]*AllocatedTaskResources, len(out.Tasks)) 3415 for task, resource := range a.Tasks { 3416 out.Tasks[task] = resource.Copy() 3417 } 3418 } 3419 if a.TaskLifecycles != nil { 3420 out.TaskLifecycles = make(map[string]*TaskLifecycleConfig, len(out.TaskLifecycles)) 3421 for task, lifecycle := range a.TaskLifecycles { 3422 out.TaskLifecycles[task] = lifecycle.Copy() 3423 } 3424 3425 } 3426 3427 return &out 3428} 3429 3430// Comparable returns a comparable version of the allocations allocated 3431// resources. This conversion can be lossy so care must be taken when using it. 3432func (a *AllocatedResources) Comparable() *ComparableResources { 3433 if a == nil { 3434 return nil 3435 } 3436 3437 c := &ComparableResources{ 3438 Shared: a.Shared, 3439 } 3440 3441 prestartSidecarTasks := &AllocatedTaskResources{} 3442 prestartEphemeralTasks := &AllocatedTaskResources{} 3443 main := &AllocatedTaskResources{} 3444 poststopTasks := &AllocatedTaskResources{} 3445 3446 for taskName, r := range a.Tasks { 3447 lc := a.TaskLifecycles[taskName] 3448 if lc == nil { 3449 main.Add(r) 3450 } else if lc.Hook == TaskLifecycleHookPrestart { 3451 if lc.Sidecar { 3452 prestartSidecarTasks.Add(r) 3453 } else { 3454 prestartEphemeralTasks.Add(r) 3455 } 3456 } else if lc.Hook == TaskLifecycleHookPoststop { 3457 poststopTasks.Add(r) 3458 } 3459 } 3460 3461 // update this loop to account for lifecycle hook 3462 prestartEphemeralTasks.Max(main) 3463 prestartEphemeralTasks.Max(poststopTasks) 3464 prestartSidecarTasks.Add(prestartEphemeralTasks) 3465 c.Flattened.Add(prestartSidecarTasks) 3466 3467 // Add network resources that are at the task group level 3468 for _, network := range a.Shared.Networks { 3469 c.Flattened.Add(&AllocatedTaskResources{ 3470 Networks: []*NetworkResource{network}, 3471 }) 3472 } 3473 3474 return c 3475} 3476 3477// OldTaskResources returns the pre-0.9.0 map of task resources 3478func (a *AllocatedResources) OldTaskResources() map[string]*Resources { 3479 m := make(map[string]*Resources, len(a.Tasks)) 3480 for name, res := range a.Tasks { 3481 m[name] = &Resources{ 3482 CPU: int(res.Cpu.CpuShares), 3483 MemoryMB: int(res.Memory.MemoryMB), 3484 MemoryMaxMB: int(res.Memory.MemoryMaxMB), 3485 Networks: res.Networks, 3486 } 3487 } 3488 3489 return m 3490} 3491 3492func (a *AllocatedResources) Canonicalize() { 3493 a.Shared.Canonicalize() 3494 3495 for _, r := range a.Tasks { 3496 for _, nw := range r.Networks { 3497 for _, port := range append(nw.DynamicPorts, nw.ReservedPorts...) { 3498 a.Shared.Ports = append(a.Shared.Ports, AllocatedPortMapping{ 3499 Label: port.Label, 3500 Value: port.Value, 3501 To: port.To, 3502 HostIP: nw.IP, 3503 }) 3504 } 3505 } 3506 } 3507} 3508 3509// AllocatedTaskResources are the set of resources allocated to a task. 3510type AllocatedTaskResources struct { 3511 Cpu AllocatedCpuResources 3512 Memory AllocatedMemoryResources 3513 Networks Networks 3514 Devices []*AllocatedDeviceResource 3515} 3516 3517func (a *AllocatedTaskResources) Copy() *AllocatedTaskResources { 3518 if a == nil { 3519 return nil 3520 } 3521 newA := new(AllocatedTaskResources) 3522 *newA = *a 3523 3524 // Copy the networks 3525 newA.Networks = a.Networks.Copy() 3526 3527 // Copy the devices 3528 if newA.Devices != nil { 3529 n := len(a.Devices) 3530 newA.Devices = make([]*AllocatedDeviceResource, n) 3531 for i := 0; i < n; i++ { 3532 newA.Devices[i] = a.Devices[i].Copy() 3533 } 3534 } 3535 3536 return newA 3537} 3538 3539// NetIndex finds the matching net index using device name 3540func (a *AllocatedTaskResources) NetIndex(n *NetworkResource) int { 3541 return a.Networks.NetIndex(n) 3542} 3543 3544func (a *AllocatedTaskResources) Add(delta *AllocatedTaskResources) { 3545 if delta == nil { 3546 return 3547 } 3548 3549 a.Cpu.Add(&delta.Cpu) 3550 a.Memory.Add(&delta.Memory) 3551 3552 for _, n := range delta.Networks { 3553 // Find the matching interface by IP or CIDR 3554 idx := a.NetIndex(n) 3555 if idx == -1 { 3556 a.Networks = append(a.Networks, n.Copy()) 3557 } else { 3558 a.Networks[idx].Add(n) 3559 } 3560 } 3561 3562 for _, d := range delta.Devices { 3563 // Find the matching device 3564 idx := AllocatedDevices(a.Devices).Index(d) 3565 if idx == -1 { 3566 a.Devices = append(a.Devices, d.Copy()) 3567 } else { 3568 a.Devices[idx].Add(d) 3569 } 3570 } 3571} 3572 3573func (a *AllocatedTaskResources) Max(other *AllocatedTaskResources) { 3574 if other == nil { 3575 return 3576 } 3577 3578 a.Cpu.Max(&other.Cpu) 3579 a.Memory.Max(&other.Memory) 3580 3581 for _, n := range other.Networks { 3582 // Find the matching interface by IP or CIDR 3583 idx := a.NetIndex(n) 3584 if idx == -1 { 3585 a.Networks = append(a.Networks, n.Copy()) 3586 } else { 3587 a.Networks[idx].Add(n) 3588 } 3589 } 3590 3591 for _, d := range other.Devices { 3592 // Find the matching device 3593 idx := AllocatedDevices(a.Devices).Index(d) 3594 if idx == -1 { 3595 a.Devices = append(a.Devices, d.Copy()) 3596 } else { 3597 a.Devices[idx].Add(d) 3598 } 3599 } 3600} 3601 3602// Comparable turns AllocatedTaskResources into ComparableResources 3603// as a helper step in preemption 3604func (a *AllocatedTaskResources) Comparable() *ComparableResources { 3605 ret := &ComparableResources{ 3606 Flattened: AllocatedTaskResources{ 3607 Cpu: AllocatedCpuResources{ 3608 CpuShares: a.Cpu.CpuShares, 3609 ReservedCores: a.Cpu.ReservedCores, 3610 }, 3611 Memory: AllocatedMemoryResources{ 3612 MemoryMB: a.Memory.MemoryMB, 3613 MemoryMaxMB: a.Memory.MemoryMaxMB, 3614 }, 3615 }, 3616 } 3617 ret.Flattened.Networks = append(ret.Flattened.Networks, a.Networks...) 3618 return ret 3619} 3620 3621// Subtract only subtracts CPU and Memory resources. Network utilization 3622// is managed separately in NetworkIndex 3623func (a *AllocatedTaskResources) Subtract(delta *AllocatedTaskResources) { 3624 if delta == nil { 3625 return 3626 } 3627 3628 a.Cpu.Subtract(&delta.Cpu) 3629 a.Memory.Subtract(&delta.Memory) 3630} 3631 3632// AllocatedSharedResources are the set of resources allocated to a task group. 3633type AllocatedSharedResources struct { 3634 Networks Networks 3635 DiskMB int64 3636 Ports AllocatedPorts 3637} 3638 3639func (a AllocatedSharedResources) Copy() AllocatedSharedResources { 3640 return AllocatedSharedResources{ 3641 Networks: a.Networks.Copy(), 3642 DiskMB: a.DiskMB, 3643 Ports: a.Ports, 3644 } 3645} 3646 3647func (a *AllocatedSharedResources) Add(delta *AllocatedSharedResources) { 3648 if delta == nil { 3649 return 3650 } 3651 a.Networks = append(a.Networks, delta.Networks...) 3652 a.DiskMB += delta.DiskMB 3653 3654} 3655 3656func (a *AllocatedSharedResources) Subtract(delta *AllocatedSharedResources) { 3657 if delta == nil { 3658 return 3659 } 3660 3661 diff := map[*NetworkResource]bool{} 3662 for _, n := range delta.Networks { 3663 diff[n] = true 3664 } 3665 var nets Networks 3666 for _, n := range a.Networks { 3667 if _, ok := diff[n]; !ok { 3668 nets = append(nets, n) 3669 } 3670 } 3671 a.Networks = nets 3672 a.DiskMB -= delta.DiskMB 3673} 3674 3675func (a *AllocatedSharedResources) Canonicalize() { 3676 if len(a.Networks) > 0 { 3677 if len(a.Networks[0].DynamicPorts)+len(a.Networks[0].ReservedPorts) > 0 && len(a.Ports) == 0 { 3678 for _, ports := range [][]Port{a.Networks[0].DynamicPorts, a.Networks[0].ReservedPorts} { 3679 for _, p := range ports { 3680 a.Ports = append(a.Ports, AllocatedPortMapping{ 3681 Label: p.Label, 3682 Value: p.Value, 3683 To: p.To, 3684 HostIP: a.Networks[0].IP, 3685 }) 3686 } 3687 } 3688 } 3689 } 3690} 3691 3692// AllocatedCpuResources captures the allocated CPU resources. 3693type AllocatedCpuResources struct { 3694 CpuShares int64 3695 ReservedCores []uint16 3696} 3697 3698func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { 3699 if delta == nil { 3700 return 3701 } 3702 3703 a.CpuShares += delta.CpuShares 3704 3705 a.ReservedCores = cpuset.New(a.ReservedCores...).Union(cpuset.New(delta.ReservedCores...)).ToSlice() 3706} 3707 3708func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { 3709 if delta == nil { 3710 return 3711 } 3712 3713 a.CpuShares -= delta.CpuShares 3714 a.ReservedCores = cpuset.New(a.ReservedCores...).Difference(cpuset.New(delta.ReservedCores...)).ToSlice() 3715} 3716 3717func (a *AllocatedCpuResources) Max(other *AllocatedCpuResources) { 3718 if other == nil { 3719 return 3720 } 3721 3722 if other.CpuShares > a.CpuShares { 3723 a.CpuShares = other.CpuShares 3724 } 3725 3726 if len(other.ReservedCores) > len(a.ReservedCores) { 3727 a.ReservedCores = other.ReservedCores 3728 } 3729} 3730 3731// AllocatedMemoryResources captures the allocated memory resources. 3732type AllocatedMemoryResources struct { 3733 MemoryMB int64 3734 MemoryMaxMB int64 3735} 3736 3737func (a *AllocatedMemoryResources) Add(delta *AllocatedMemoryResources) { 3738 if delta == nil { 3739 return 3740 } 3741 3742 a.MemoryMB += delta.MemoryMB 3743 if delta.MemoryMaxMB != 0 { 3744 a.MemoryMaxMB += delta.MemoryMaxMB 3745 } else { 3746 a.MemoryMaxMB += delta.MemoryMB 3747 } 3748} 3749 3750func (a *AllocatedMemoryResources) Subtract(delta *AllocatedMemoryResources) { 3751 if delta == nil { 3752 return 3753 } 3754 3755 a.MemoryMB -= delta.MemoryMB 3756 if delta.MemoryMaxMB != 0 { 3757 a.MemoryMaxMB -= delta.MemoryMaxMB 3758 } else { 3759 a.MemoryMaxMB -= delta.MemoryMB 3760 } 3761} 3762 3763func (a *AllocatedMemoryResources) Max(other *AllocatedMemoryResources) { 3764 if other == nil { 3765 return 3766 } 3767 3768 if other.MemoryMB > a.MemoryMB { 3769 a.MemoryMB = other.MemoryMB 3770 } 3771 if other.MemoryMaxMB > a.MemoryMaxMB { 3772 a.MemoryMaxMB = other.MemoryMaxMB 3773 } 3774} 3775 3776type AllocatedDevices []*AllocatedDeviceResource 3777 3778// Index finds the matching index using the passed device. If not found, -1 is 3779// returned. 3780func (a AllocatedDevices) Index(d *AllocatedDeviceResource) int { 3781 if d == nil { 3782 return -1 3783 } 3784 3785 for i, o := range a { 3786 if o.ID().Equals(d.ID()) { 3787 return i 3788 } 3789 } 3790 3791 return -1 3792} 3793 3794// AllocatedDeviceResource captures a set of allocated devices. 3795type AllocatedDeviceResource struct { 3796 // Vendor, Type, and Name are used to select the plugin to request the 3797 // device IDs from. 3798 Vendor string 3799 Type string 3800 Name string 3801 3802 // DeviceIDs is the set of allocated devices 3803 DeviceIDs []string 3804} 3805 3806func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { 3807 if a == nil { 3808 return nil 3809 } 3810 3811 return &DeviceIdTuple{ 3812 Vendor: a.Vendor, 3813 Type: a.Type, 3814 Name: a.Name, 3815 } 3816} 3817 3818func (a *AllocatedDeviceResource) Add(delta *AllocatedDeviceResource) { 3819 if delta == nil { 3820 return 3821 } 3822 3823 a.DeviceIDs = append(a.DeviceIDs, delta.DeviceIDs...) 3824} 3825 3826func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { 3827 if a == nil { 3828 return a 3829 } 3830 3831 na := *a 3832 3833 // Copy the devices 3834 na.DeviceIDs = make([]string, len(a.DeviceIDs)) 3835 for i, id := range a.DeviceIDs { 3836 na.DeviceIDs[i] = id 3837 } 3838 3839 return &na 3840} 3841 3842// ComparableResources is the set of resources allocated to a task group but 3843// not keyed by Task, making it easier to compare. 3844type ComparableResources struct { 3845 Flattened AllocatedTaskResources 3846 Shared AllocatedSharedResources 3847} 3848 3849func (c *ComparableResources) Add(delta *ComparableResources) { 3850 if delta == nil { 3851 return 3852 } 3853 3854 c.Flattened.Add(&delta.Flattened) 3855 c.Shared.Add(&delta.Shared) 3856} 3857 3858func (c *ComparableResources) Subtract(delta *ComparableResources) { 3859 if delta == nil { 3860 return 3861 } 3862 3863 c.Flattened.Subtract(&delta.Flattened) 3864 c.Shared.Subtract(&delta.Shared) 3865} 3866 3867func (c *ComparableResources) Copy() *ComparableResources { 3868 if c == nil { 3869 return nil 3870 } 3871 newR := new(ComparableResources) 3872 *newR = *c 3873 return newR 3874} 3875 3876// Superset checks if one set of resources is a superset of another. This 3877// ignores network resources, and the NetworkIndex should be used for that. 3878func (c *ComparableResources) Superset(other *ComparableResources) (bool, string) { 3879 if c.Flattened.Cpu.CpuShares < other.Flattened.Cpu.CpuShares { 3880 return false, "cpu" 3881 } 3882 3883 if len(c.Flattened.Cpu.ReservedCores) > 0 && !cpuset.New(c.Flattened.Cpu.ReservedCores...).IsSupersetOf(cpuset.New(other.Flattened.Cpu.ReservedCores...)) { 3884 return false, "cores" 3885 } 3886 if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { 3887 return false, "memory" 3888 } 3889 if c.Shared.DiskMB < other.Shared.DiskMB { 3890 return false, "disk" 3891 } 3892 return true, "" 3893} 3894 3895// allocated finds the matching net index using device name 3896func (c *ComparableResources) NetIndex(n *NetworkResource) int { 3897 return c.Flattened.Networks.NetIndex(n) 3898} 3899 3900const ( 3901 // JobTypeNomad is reserved for internal system tasks and is 3902 // always handled by the CoreScheduler. 3903 JobTypeCore = "_core" 3904 JobTypeService = "service" 3905 JobTypeBatch = "batch" 3906 JobTypeSystem = "system" 3907) 3908 3909const ( 3910 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 3911 JobStatusRunning = "running" // Running means the job has non-terminal allocations 3912 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 3913) 3914 3915const ( 3916 // JobMinPriority is the minimum allowed priority 3917 JobMinPriority = 1 3918 3919 // JobDefaultPriority is the default priority if not 3920 // not specified. 3921 JobDefaultPriority = 50 3922 3923 // JobMaxPriority is the maximum allowed priority 3924 JobMaxPriority = 100 3925 3926 // Ensure CoreJobPriority is higher than any user 3927 // specified job so that it gets priority. This is important 3928 // for the system to remain healthy. 3929 CoreJobPriority = JobMaxPriority * 2 3930 3931 // JobTrackedVersions is the number of historic job versions that are 3932 // kept. 3933 JobTrackedVersions = 6 3934 3935 // JobTrackedScalingEvents is the number of scaling events that are 3936 // kept for a single task group. 3937 JobTrackedScalingEvents = 20 3938) 3939 3940// Job is the scope of a scheduling request to Nomad. It is the largest 3941// scoped object, and is a named collection of task groups. Each task group 3942// is further composed of tasks. A task group (TG) is the unit of scheduling 3943// however. 3944type Job struct { 3945 // Stop marks whether the user has stopped the job. A stopped job will 3946 // have all created allocations stopped and acts as a way to stop a job 3947 // without purging it from the system. This allows existing allocs to be 3948 // queried and the job to be inspected as it is being killed. 3949 Stop bool 3950 3951 // Region is the Nomad region that handles scheduling this job 3952 Region string 3953 3954 // Namespace is the namespace the job is submitted into. 3955 Namespace string 3956 3957 // ID is a unique identifier for the job per region. It can be 3958 // specified hierarchically like LineOfBiz/OrgName/Team/Project 3959 ID string 3960 3961 // ParentID is the unique identifier of the job that spawned this job. 3962 ParentID string 3963 3964 // Name is the logical name of the job used to refer to it. This is unique 3965 // per region, but not unique globally. 3966 Name string 3967 3968 // Type is used to control various behaviors about the job. Most jobs 3969 // are service jobs, meaning they are expected to be long lived. 3970 // Some jobs are batch oriented meaning they run and then terminate. 3971 // This can be extended in the future to support custom schedulers. 3972 Type string 3973 3974 // Priority is used to control scheduling importance and if this job 3975 // can preempt other jobs. 3976 Priority int 3977 3978 // AllAtOnce is used to control if incremental scheduling of task groups 3979 // is allowed or if we must do a gang scheduling of the entire job. This 3980 // can slow down larger jobs if resources are not available. 3981 AllAtOnce bool 3982 3983 // Datacenters contains all the datacenters this job is allowed to span 3984 Datacenters []string 3985 3986 // Constraints can be specified at a job level and apply to 3987 // all the task groups and tasks. 3988 Constraints []*Constraint 3989 3990 // Affinities can be specified at the job level to express 3991 // scheduling preferences that apply to all groups and tasks 3992 Affinities []*Affinity 3993 3994 // Spread can be specified at the job level to express spreading 3995 // allocations across a desired attribute, such as datacenter 3996 Spreads []*Spread 3997 3998 // TaskGroups are the collections of task groups that this job needs 3999 // to run. Each task group is an atomic unit of scheduling and placement. 4000 TaskGroups []*TaskGroup 4001 4002 // See agent.ApiJobToStructJob 4003 // Update provides defaults for the TaskGroup Update stanzas 4004 Update UpdateStrategy 4005 4006 Multiregion *Multiregion 4007 4008 // Periodic is used to define the interval the job is run at. 4009 Periodic *PeriodicConfig 4010 4011 // ParameterizedJob is used to specify the job as a parameterized job 4012 // for dispatching. 4013 ParameterizedJob *ParameterizedJobConfig 4014 4015 // Dispatched is used to identify if the Job has been dispatched from a 4016 // parameterized job. 4017 Dispatched bool 4018 4019 // Payload is the payload supplied when the job was dispatched. 4020 Payload []byte 4021 4022 // Meta is used to associate arbitrary metadata with this 4023 // job. This is opaque to Nomad. 4024 Meta map[string]string 4025 4026 // ConsulToken is the Consul token that proves the submitter of the job has 4027 // access to the Service Identity policies associated with the job's 4028 // Consul Connect enabled services. This field is only used to transfer the 4029 // token and is not stored after Job submission. 4030 ConsulToken string 4031 4032 // ConsulNamespace is the Consul namespace 4033 ConsulNamespace string 4034 4035 // VaultToken is the Vault token that proves the submitter of the job has 4036 // access to the specified Vault policies. This field is only used to 4037 // transfer the token and is not stored after Job submission. 4038 VaultToken string 4039 4040 // VaultNamespace is the Vault namespace 4041 VaultNamespace string 4042 4043 // NomadTokenID is the Accessor ID of the ACL token (if any) 4044 // used to register this version of the job. Used by deploymentwatcher. 4045 NomadTokenID string 4046 4047 // Job status 4048 Status string 4049 4050 // StatusDescription is meant to provide more human useful information 4051 StatusDescription string 4052 4053 // Stable marks a job as stable. Stability is only defined on "service" and 4054 // "system" jobs. The stability of a job will be set automatically as part 4055 // of a deployment and can be manually set via APIs. This field is updated 4056 // when the status of a corresponding deployment transitions to Failed 4057 // or Successful. This field is not meaningful for jobs that don't have an 4058 // update stanza. 4059 Stable bool 4060 4061 // Version is a monotonically increasing version number that is incremented 4062 // on each job register. 4063 Version uint64 4064 4065 // SubmitTime is the time at which the job was submitted as a UnixNano in 4066 // UTC 4067 SubmitTime int64 4068 4069 // Raft Indexes 4070 CreateIndex uint64 4071 ModifyIndex uint64 4072 JobModifyIndex uint64 4073} 4074 4075// NamespacedID returns the namespaced id useful for logging 4076func (j *Job) NamespacedID() *NamespacedID { 4077 return &NamespacedID{ 4078 ID: j.ID, 4079 Namespace: j.Namespace, 4080 } 4081} 4082 4083// Canonicalize is used to canonicalize fields in the Job. This should be 4084// called when registering a Job. 4085func (j *Job) Canonicalize() { 4086 if j == nil { 4087 return 4088 } 4089 4090 // Ensure that an empty and nil map are treated the same to avoid scheduling 4091 // problems since we use reflect DeepEquals. 4092 if len(j.Meta) == 0 { 4093 j.Meta = nil 4094 } 4095 4096 // Ensure the job is in a namespace. 4097 if j.Namespace == "" { 4098 j.Namespace = DefaultNamespace 4099 } 4100 4101 for _, tg := range j.TaskGroups { 4102 tg.Canonicalize(j) 4103 } 4104 4105 if j.ParameterizedJob != nil { 4106 j.ParameterizedJob.Canonicalize() 4107 } 4108 4109 if j.Multiregion != nil { 4110 j.Multiregion.Canonicalize() 4111 } 4112 4113 if j.Periodic != nil { 4114 j.Periodic.Canonicalize() 4115 } 4116} 4117 4118// Copy returns a deep copy of the Job. It is expected that callers use recover. 4119// This job can panic if the deep copy failed as it uses reflection. 4120func (j *Job) Copy() *Job { 4121 if j == nil { 4122 return nil 4123 } 4124 nj := new(Job) 4125 *nj = *j 4126 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 4127 nj.Constraints = CopySliceConstraints(nj.Constraints) 4128 nj.Affinities = CopySliceAffinities(nj.Affinities) 4129 nj.Multiregion = nj.Multiregion.Copy() 4130 4131 if j.TaskGroups != nil { 4132 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 4133 for i, tg := range nj.TaskGroups { 4134 tgs[i] = tg.Copy() 4135 } 4136 nj.TaskGroups = tgs 4137 } 4138 4139 nj.Periodic = nj.Periodic.Copy() 4140 nj.Meta = helper.CopyMapStringString(nj.Meta) 4141 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 4142 return nj 4143} 4144 4145// Validate is used to check a job for reasonable configuration 4146func (j *Job) Validate() error { 4147 var mErr multierror.Error 4148 4149 if j.Region == "" && j.Multiregion == nil { 4150 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 4151 } 4152 if j.ID == "" { 4153 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 4154 } else if strings.Contains(j.ID, " ") { 4155 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 4156 } else if strings.Contains(j.ID, "\000") { 4157 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a null character")) 4158 } 4159 if j.Name == "" { 4160 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 4161 } else if strings.Contains(j.Name, "\000") { 4162 mErr.Errors = append(mErr.Errors, errors.New("Job Name contains a null character")) 4163 } 4164 if j.Namespace == "" { 4165 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 4166 } 4167 switch j.Type { 4168 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 4169 case "": 4170 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 4171 default: 4172 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 4173 } 4174 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 4175 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 4176 } 4177 if len(j.Datacenters) == 0 && !j.IsMultiregion() { 4178 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 4179 } else { 4180 for _, v := range j.Datacenters { 4181 if v == "" { 4182 mErr.Errors = append(mErr.Errors, errors.New("Job datacenter must be non-empty string")) 4183 } 4184 } 4185 } 4186 if len(j.TaskGroups) == 0 { 4187 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 4188 } 4189 for idx, constr := range j.Constraints { 4190 if err := constr.Validate(); err != nil { 4191 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 4192 mErr.Errors = append(mErr.Errors, outer) 4193 } 4194 } 4195 if j.Type == JobTypeSystem { 4196 if j.Affinities != nil { 4197 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 4198 } 4199 } else { 4200 for idx, affinity := range j.Affinities { 4201 if err := affinity.Validate(); err != nil { 4202 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 4203 mErr.Errors = append(mErr.Errors, outer) 4204 } 4205 } 4206 } 4207 4208 if j.Type == JobTypeSystem { 4209 if j.Spreads != nil { 4210 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 4211 } 4212 } else { 4213 for idx, spread := range j.Spreads { 4214 if err := spread.Validate(); err != nil { 4215 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 4216 mErr.Errors = append(mErr.Errors, outer) 4217 } 4218 } 4219 } 4220 4221 // Check for duplicate task groups 4222 taskGroups := make(map[string]int) 4223 for idx, tg := range j.TaskGroups { 4224 if tg.Name == "" { 4225 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 4226 } else if existing, ok := taskGroups[tg.Name]; ok { 4227 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 4228 } else { 4229 taskGroups[tg.Name] = idx 4230 } 4231 4232 if tg.ShutdownDelay != nil && *tg.ShutdownDelay < 0 { 4233 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 4234 } 4235 4236 if tg.StopAfterClientDisconnect != nil && *tg.StopAfterClientDisconnect != 0 { 4237 if *tg.StopAfterClientDisconnect > 0 && 4238 !(j.Type == JobTypeBatch || j.Type == JobTypeService) { 4239 mErr.Errors = append(mErr.Errors, errors.New("stop_after_client_disconnect can only be set in batch and service jobs")) 4240 } else if *tg.StopAfterClientDisconnect < 0 { 4241 mErr.Errors = append(mErr.Errors, errors.New("stop_after_client_disconnect must be a positive value")) 4242 } 4243 } 4244 4245 if j.Type == "system" && tg.Count > 1 { 4246 mErr.Errors = append(mErr.Errors, 4247 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 4248 tg.Name, tg.Count)) 4249 } 4250 } 4251 4252 // Validate the task group 4253 for _, tg := range j.TaskGroups { 4254 if err := tg.Validate(j); err != nil { 4255 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 4256 mErr.Errors = append(mErr.Errors, outer) 4257 } 4258 } 4259 4260 // Validate periodic is only used with batch jobs. 4261 if j.IsPeriodic() && j.Periodic.Enabled { 4262 if j.Type != JobTypeBatch { 4263 mErr.Errors = append(mErr.Errors, 4264 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 4265 } 4266 4267 if err := j.Periodic.Validate(); err != nil { 4268 mErr.Errors = append(mErr.Errors, err) 4269 } 4270 } 4271 4272 if j.IsParameterized() { 4273 if j.Type != JobTypeBatch { 4274 mErr.Errors = append(mErr.Errors, 4275 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 4276 } 4277 4278 if err := j.ParameterizedJob.Validate(); err != nil { 4279 mErr.Errors = append(mErr.Errors, err) 4280 } 4281 } 4282 4283 if j.IsMultiregion() { 4284 if err := j.Multiregion.Validate(j.Type, j.Datacenters); err != nil { 4285 mErr.Errors = append(mErr.Errors, err) 4286 } 4287 } 4288 4289 return mErr.ErrorOrNil() 4290} 4291 4292// Warnings returns a list of warnings that may be from dubious settings or 4293// deprecation warnings. 4294func (j *Job) Warnings() error { 4295 var mErr multierror.Error 4296 4297 // Check the groups 4298 ap := 0 4299 for _, tg := range j.TaskGroups { 4300 if err := tg.Warnings(j); err != nil { 4301 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 4302 mErr.Errors = append(mErr.Errors, outer) 4303 } 4304 if tg.Update != nil && tg.Update.AutoPromote { 4305 ap += 1 4306 } 4307 } 4308 4309 // Check AutoPromote, should be all or none 4310 if ap > 0 && ap < len(j.TaskGroups) { 4311 err := fmt.Errorf("auto_promote must be true for all groups to enable automatic promotion") 4312 mErr.Errors = append(mErr.Errors, err) 4313 } 4314 4315 return mErr.ErrorOrNil() 4316} 4317 4318// LookupTaskGroup finds a task group by name 4319func (j *Job) LookupTaskGroup(name string) *TaskGroup { 4320 for _, tg := range j.TaskGroups { 4321 if tg.Name == name { 4322 return tg 4323 } 4324 } 4325 return nil 4326} 4327 4328// CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 4329// meta data for the task. When joining Job, Group and Task Meta, the precedence 4330// is by deepest scope (Task > Group > Job). 4331func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 4332 group := j.LookupTaskGroup(groupName) 4333 if group == nil { 4334 return j.Meta 4335 } 4336 4337 var meta map[string]string 4338 4339 task := group.LookupTask(taskName) 4340 if task != nil { 4341 meta = helper.CopyMapStringString(task.Meta) 4342 } 4343 4344 if meta == nil { 4345 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 4346 } 4347 4348 // Add the group specific meta 4349 for k, v := range group.Meta { 4350 if _, ok := meta[k]; !ok { 4351 meta[k] = v 4352 } 4353 } 4354 4355 // Add the job specific meta 4356 for k, v := range j.Meta { 4357 if _, ok := meta[k]; !ok { 4358 meta[k] = v 4359 } 4360 } 4361 4362 return meta 4363} 4364 4365// Stopped returns if a job is stopped. 4366func (j *Job) Stopped() bool { 4367 return j == nil || j.Stop 4368} 4369 4370// HasUpdateStrategy returns if any task group in the job has an update strategy 4371func (j *Job) HasUpdateStrategy() bool { 4372 for _, tg := range j.TaskGroups { 4373 if !tg.Update.IsEmpty() { 4374 return true 4375 } 4376 } 4377 4378 return false 4379} 4380 4381// Stub is used to return a summary of the job 4382func (j *Job) Stub(summary *JobSummary) *JobListStub { 4383 return &JobListStub{ 4384 ID: j.ID, 4385 Namespace: j.Namespace, 4386 ParentID: j.ParentID, 4387 Name: j.Name, 4388 Datacenters: j.Datacenters, 4389 Multiregion: j.Multiregion, 4390 Type: j.Type, 4391 Priority: j.Priority, 4392 Periodic: j.IsPeriodic(), 4393 ParameterizedJob: j.IsParameterized(), 4394 Stop: j.Stop, 4395 Status: j.Status, 4396 StatusDescription: j.StatusDescription, 4397 CreateIndex: j.CreateIndex, 4398 ModifyIndex: j.ModifyIndex, 4399 JobModifyIndex: j.JobModifyIndex, 4400 SubmitTime: j.SubmitTime, 4401 JobSummary: summary, 4402 } 4403} 4404 4405// IsPeriodic returns whether a job is periodic. 4406func (j *Job) IsPeriodic() bool { 4407 return j.Periodic != nil 4408} 4409 4410// IsPeriodicActive returns whether the job is an active periodic job that will 4411// create child jobs 4412func (j *Job) IsPeriodicActive() bool { 4413 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 4414} 4415 4416// IsParameterized returns whether a job is parameterized job. 4417func (j *Job) IsParameterized() bool { 4418 return j.ParameterizedJob != nil && !j.Dispatched 4419} 4420 4421// IsMultiregion returns whether a job is multiregion 4422func (j *Job) IsMultiregion() bool { 4423 return j.Multiregion != nil && j.Multiregion.Regions != nil && len(j.Multiregion.Regions) > 0 4424} 4425 4426// VaultPolicies returns the set of Vault policies per task group, per task 4427func (j *Job) VaultPolicies() map[string]map[string]*Vault { 4428 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 4429 4430 for _, tg := range j.TaskGroups { 4431 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 4432 4433 for _, task := range tg.Tasks { 4434 if task.Vault == nil { 4435 continue 4436 } 4437 4438 tgPolicies[task.Name] = task.Vault 4439 } 4440 4441 if len(tgPolicies) != 0 { 4442 policies[tg.Name] = tgPolicies 4443 } 4444 } 4445 4446 return policies 4447} 4448 4449// ConnectTasks returns the set of Consul Connect enabled tasks defined on the 4450// job that will require a Service Identity token in the case that Consul ACLs 4451// are enabled. The TaskKind.Value is the name of the Consul service. 4452// 4453// This method is meaningful only after the Job has passed through the job 4454// submission Mutator functions. 4455func (j *Job) ConnectTasks() []TaskKind { 4456 var kinds []TaskKind 4457 for _, tg := range j.TaskGroups { 4458 for _, task := range tg.Tasks { 4459 if task.Kind.IsConnectProxy() || 4460 task.Kind.IsConnectNative() || 4461 task.Kind.IsAnyConnectGateway() { 4462 kinds = append(kinds, task.Kind) 4463 } 4464 } 4465 } 4466 return kinds 4467} 4468 4469// RequiredSignals returns a mapping of task groups to tasks to their required 4470// set of signals 4471func (j *Job) RequiredSignals() map[string]map[string][]string { 4472 signals := make(map[string]map[string][]string) 4473 4474 for _, tg := range j.TaskGroups { 4475 for _, task := range tg.Tasks { 4476 // Use this local one as a set 4477 taskSignals := make(map[string]struct{}) 4478 4479 // Check if the Vault change mode uses signals 4480 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 4481 taskSignals[task.Vault.ChangeSignal] = struct{}{} 4482 } 4483 4484 // If a user has specified a KillSignal, add it to required signals 4485 if task.KillSignal != "" { 4486 taskSignals[task.KillSignal] = struct{}{} 4487 } 4488 4489 // Check if any template change mode uses signals 4490 for _, t := range task.Templates { 4491 if t.ChangeMode != TemplateChangeModeSignal { 4492 continue 4493 } 4494 4495 taskSignals[t.ChangeSignal] = struct{}{} 4496 } 4497 4498 // Flatten and sort the signals 4499 l := len(taskSignals) 4500 if l == 0 { 4501 continue 4502 } 4503 4504 flat := make([]string, 0, l) 4505 for sig := range taskSignals { 4506 flat = append(flat, sig) 4507 } 4508 4509 sort.Strings(flat) 4510 tgSignals, ok := signals[tg.Name] 4511 if !ok { 4512 tgSignals = make(map[string][]string) 4513 signals[tg.Name] = tgSignals 4514 } 4515 tgSignals[task.Name] = flat 4516 } 4517 4518 } 4519 4520 return signals 4521} 4522 4523// SpecChanged determines if the functional specification has changed between 4524// two job versions. 4525func (j *Job) SpecChanged(new *Job) bool { 4526 if j == nil { 4527 return new != nil 4528 } 4529 4530 // Create a copy of the new job 4531 c := new.Copy() 4532 4533 // Update the new job so we can do a reflect 4534 c.Status = j.Status 4535 c.StatusDescription = j.StatusDescription 4536 c.Stable = j.Stable 4537 c.Version = j.Version 4538 c.CreateIndex = j.CreateIndex 4539 c.ModifyIndex = j.ModifyIndex 4540 c.JobModifyIndex = j.JobModifyIndex 4541 c.SubmitTime = j.SubmitTime 4542 4543 // cgbaker: FINISH: probably need some consideration of scaling policy ID here 4544 4545 // Deep equals the jobs 4546 return !reflect.DeepEqual(j, c) 4547} 4548 4549func (j *Job) SetSubmitTime() { 4550 j.SubmitTime = time.Now().UTC().UnixNano() 4551} 4552 4553// JobListStub is used to return a subset of job information 4554// for the job list 4555type JobListStub struct { 4556 ID string 4557 ParentID string 4558 Name string 4559 Namespace string `json:",omitempty"` 4560 Datacenters []string 4561 Multiregion *Multiregion 4562 Type string 4563 Priority int 4564 Periodic bool 4565 ParameterizedJob bool 4566 Stop bool 4567 Status string 4568 StatusDescription string 4569 JobSummary *JobSummary 4570 CreateIndex uint64 4571 ModifyIndex uint64 4572 JobModifyIndex uint64 4573 SubmitTime int64 4574} 4575 4576// JobSummary summarizes the state of the allocations of a job 4577type JobSummary struct { 4578 // JobID is the ID of the job the summary is for 4579 JobID string 4580 4581 // Namespace is the namespace of the job and its summary 4582 Namespace string 4583 4584 // Summary contains the summary per task group for the Job 4585 Summary map[string]TaskGroupSummary 4586 4587 // Children contains a summary for the children of this job. 4588 Children *JobChildrenSummary 4589 4590 // Raft Indexes 4591 CreateIndex uint64 4592 ModifyIndex uint64 4593} 4594 4595// Copy returns a new copy of JobSummary 4596func (js *JobSummary) Copy() *JobSummary { 4597 newJobSummary := new(JobSummary) 4598 *newJobSummary = *js 4599 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 4600 for k, v := range js.Summary { 4601 newTGSummary[k] = v 4602 } 4603 newJobSummary.Summary = newTGSummary 4604 newJobSummary.Children = newJobSummary.Children.Copy() 4605 return newJobSummary 4606} 4607 4608// JobChildrenSummary contains the summary of children job statuses 4609type JobChildrenSummary struct { 4610 Pending int64 4611 Running int64 4612 Dead int64 4613} 4614 4615// Copy returns a new copy of a JobChildrenSummary 4616func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 4617 if jc == nil { 4618 return nil 4619 } 4620 4621 njc := new(JobChildrenSummary) 4622 *njc = *jc 4623 return njc 4624} 4625 4626// TaskGroup summarizes the state of all the allocations of a particular 4627// TaskGroup 4628type TaskGroupSummary struct { 4629 Queued int 4630 Complete int 4631 Failed int 4632 Running int 4633 Starting int 4634 Lost int 4635} 4636 4637const ( 4638 // Checks uses any registered health check state in combination with task 4639 // states to determine if a allocation is healthy. 4640 UpdateStrategyHealthCheck_Checks = "checks" 4641 4642 // TaskStates uses the task states of an allocation to determine if the 4643 // allocation is healthy. 4644 UpdateStrategyHealthCheck_TaskStates = "task_states" 4645 4646 // Manual allows the operator to manually signal to Nomad when an 4647 // allocations is healthy. This allows more advanced health checking that is 4648 // outside of the scope of Nomad. 4649 UpdateStrategyHealthCheck_Manual = "manual" 4650) 4651 4652var ( 4653 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 4654 // jobs with the old policy or for populating field defaults. 4655 DefaultUpdateStrategy = &UpdateStrategy{ 4656 Stagger: 30 * time.Second, 4657 MaxParallel: 1, 4658 HealthCheck: UpdateStrategyHealthCheck_Checks, 4659 MinHealthyTime: 10 * time.Second, 4660 HealthyDeadline: 5 * time.Minute, 4661 ProgressDeadline: 10 * time.Minute, 4662 AutoRevert: false, 4663 AutoPromote: false, 4664 Canary: 0, 4665 } 4666) 4667 4668// UpdateStrategy is used to modify how updates are done 4669type UpdateStrategy struct { 4670 // Stagger is used to determine the rate at which allocations are migrated 4671 // due to down or draining nodes. 4672 Stagger time.Duration 4673 4674 // MaxParallel is how many updates can be done in parallel 4675 MaxParallel int 4676 4677 // HealthCheck specifies the mechanism in which allocations are marked 4678 // healthy or unhealthy as part of a deployment. 4679 HealthCheck string 4680 4681 // MinHealthyTime is the minimum time an allocation must be in the healthy 4682 // state before it is marked as healthy, unblocking more allocations to be 4683 // rolled. 4684 MinHealthyTime time.Duration 4685 4686 // HealthyDeadline is the time in which an allocation must be marked as 4687 // healthy before it is automatically transitioned to unhealthy. This time 4688 // period doesn't count against the MinHealthyTime. 4689 HealthyDeadline time.Duration 4690 4691 // ProgressDeadline is the time in which an allocation as part of the 4692 // deployment must transition to healthy. If no allocation becomes healthy 4693 // after the deadline, the deployment is marked as failed. If the deadline 4694 // is zero, the first failure causes the deployment to fail. 4695 ProgressDeadline time.Duration 4696 4697 // AutoRevert declares that if a deployment fails because of unhealthy 4698 // allocations, there should be an attempt to auto-revert the job to a 4699 // stable version. 4700 AutoRevert bool 4701 4702 // AutoPromote declares that the deployment should be promoted when all canaries are 4703 // healthy 4704 AutoPromote bool 4705 4706 // Canary is the number of canaries to deploy when a change to the task 4707 // group is detected. 4708 Canary int 4709} 4710 4711func (u *UpdateStrategy) Copy() *UpdateStrategy { 4712 if u == nil { 4713 return nil 4714 } 4715 4716 copy := new(UpdateStrategy) 4717 *copy = *u 4718 return copy 4719} 4720 4721func (u *UpdateStrategy) Validate() error { 4722 if u == nil { 4723 return nil 4724 } 4725 4726 var mErr multierror.Error 4727 switch u.HealthCheck { 4728 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 4729 default: 4730 _ = multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 4731 } 4732 4733 if u.MaxParallel < 0 { 4734 _ = multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel)) 4735 } 4736 if u.Canary < 0 { 4737 _ = multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 4738 } 4739 if u.Canary == 0 && u.AutoPromote { 4740 _ = multierror.Append(&mErr, fmt.Errorf("Auto Promote requires a Canary count greater than zero")) 4741 } 4742 if u.MinHealthyTime < 0 { 4743 _ = multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 4744 } 4745 if u.HealthyDeadline <= 0 { 4746 _ = multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 4747 } 4748 if u.ProgressDeadline < 0 { 4749 _ = multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 4750 } 4751 if u.MinHealthyTime >= u.HealthyDeadline { 4752 _ = multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 4753 } 4754 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 4755 _ = multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 4756 } 4757 if u.Stagger <= 0 { 4758 _ = multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 4759 } 4760 4761 return mErr.ErrorOrNil() 4762} 4763 4764func (u *UpdateStrategy) IsEmpty() bool { 4765 if u == nil { 4766 return true 4767 } 4768 4769 return u.MaxParallel == 0 4770} 4771 4772// TODO(alexdadgar): Remove once no longer used by the scheduler. 4773// Rolling returns if a rolling strategy should be used 4774func (u *UpdateStrategy) Rolling() bool { 4775 return u.Stagger > 0 && u.MaxParallel > 0 4776} 4777 4778type Multiregion struct { 4779 Strategy *MultiregionStrategy 4780 Regions []*MultiregionRegion 4781} 4782 4783func (m *Multiregion) Canonicalize() { 4784 if m.Strategy == nil { 4785 m.Strategy = &MultiregionStrategy{} 4786 } 4787 if m.Regions == nil { 4788 m.Regions = []*MultiregionRegion{} 4789 } 4790} 4791 4792// Diff indicates whether the multiregion config has changed 4793func (m *Multiregion) Diff(m2 *Multiregion) bool { 4794 return !reflect.DeepEqual(m, m2) 4795} 4796 4797func (m *Multiregion) Copy() *Multiregion { 4798 if m == nil { 4799 return nil 4800 } 4801 copy := new(Multiregion) 4802 if m.Strategy != nil { 4803 copy.Strategy = &MultiregionStrategy{ 4804 MaxParallel: m.Strategy.MaxParallel, 4805 OnFailure: m.Strategy.OnFailure, 4806 } 4807 } 4808 for _, region := range m.Regions { 4809 copyRegion := &MultiregionRegion{ 4810 Name: region.Name, 4811 Count: region.Count, 4812 Datacenters: []string{}, 4813 Meta: map[string]string{}, 4814 } 4815 copyRegion.Datacenters = append(copyRegion.Datacenters, region.Datacenters...) 4816 for k, v := range region.Meta { 4817 copyRegion.Meta[k] = v 4818 } 4819 copy.Regions = append(copy.Regions, copyRegion) 4820 } 4821 return copy 4822} 4823 4824type MultiregionStrategy struct { 4825 MaxParallel int 4826 OnFailure string 4827} 4828 4829type MultiregionRegion struct { 4830 Name string 4831 Count int 4832 Datacenters []string 4833 Meta map[string]string 4834} 4835 4836// Namespace allows logically grouping jobs and their associated objects. 4837type Namespace struct { 4838 // Name is the name of the namespace 4839 Name string 4840 4841 // Description is a human readable description of the namespace 4842 Description string 4843 4844 // Quota is the quota specification that the namespace should account 4845 // against. 4846 Quota string 4847 4848 // Hash is the hash of the namespace which is used to efficiently replicate 4849 // cross-regions. 4850 Hash []byte 4851 4852 // Raft Indexes 4853 CreateIndex uint64 4854 ModifyIndex uint64 4855} 4856 4857func (n *Namespace) Validate() error { 4858 var mErr multierror.Error 4859 4860 // Validate the name and description 4861 if !validNamespaceName.MatchString(n.Name) { 4862 err := fmt.Errorf("invalid name %q. Must match regex %s", n.Name, validNamespaceName) 4863 mErr.Errors = append(mErr.Errors, err) 4864 } 4865 if len(n.Description) > maxNamespaceDescriptionLength { 4866 err := fmt.Errorf("description longer than %d", maxNamespaceDescriptionLength) 4867 mErr.Errors = append(mErr.Errors, err) 4868 } 4869 4870 return mErr.ErrorOrNil() 4871} 4872 4873// SetHash is used to compute and set the hash of the namespace 4874func (n *Namespace) SetHash() []byte { 4875 // Initialize a 256bit Blake2 hash (32 bytes) 4876 hash, err := blake2b.New256(nil) 4877 if err != nil { 4878 panic(err) 4879 } 4880 4881 // Write all the user set fields 4882 _, _ = hash.Write([]byte(n.Name)) 4883 _, _ = hash.Write([]byte(n.Description)) 4884 _, _ = hash.Write([]byte(n.Quota)) 4885 4886 // Finalize the hash 4887 hashVal := hash.Sum(nil) 4888 4889 // Set and return the hash 4890 n.Hash = hashVal 4891 return hashVal 4892} 4893 4894func (n *Namespace) Copy() *Namespace { 4895 nc := new(Namespace) 4896 *nc = *n 4897 nc.Hash = make([]byte, len(n.Hash)) 4898 copy(nc.Hash, n.Hash) 4899 return nc 4900} 4901 4902// NamespaceListRequest is used to request a list of namespaces 4903type NamespaceListRequest struct { 4904 QueryOptions 4905} 4906 4907// NamespaceListResponse is used for a list request 4908type NamespaceListResponse struct { 4909 Namespaces []*Namespace 4910 QueryMeta 4911} 4912 4913// NamespaceSpecificRequest is used to query a specific namespace 4914type NamespaceSpecificRequest struct { 4915 Name string 4916 QueryOptions 4917} 4918 4919// SingleNamespaceResponse is used to return a single namespace 4920type SingleNamespaceResponse struct { 4921 Namespace *Namespace 4922 QueryMeta 4923} 4924 4925// NamespaceSetRequest is used to query a set of namespaces 4926type NamespaceSetRequest struct { 4927 Namespaces []string 4928 QueryOptions 4929} 4930 4931// NamespaceSetResponse is used to return a set of namespaces 4932type NamespaceSetResponse struct { 4933 Namespaces map[string]*Namespace // Keyed by namespace Name 4934 QueryMeta 4935} 4936 4937// NamespaceDeleteRequest is used to delete a set of namespaces 4938type NamespaceDeleteRequest struct { 4939 Namespaces []string 4940 WriteRequest 4941} 4942 4943// NamespaceUpsertRequest is used to upsert a set of namespaces 4944type NamespaceUpsertRequest struct { 4945 Namespaces []*Namespace 4946 WriteRequest 4947} 4948 4949const ( 4950 // PeriodicSpecCron is used for a cron spec. 4951 PeriodicSpecCron = "cron" 4952 4953 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 4954 // separated list of unix timestamps at which to launch. 4955 PeriodicSpecTest = "_internal_test" 4956) 4957 4958// Periodic defines the interval a job should be run at. 4959type PeriodicConfig struct { 4960 // Enabled determines if the job should be run periodically. 4961 Enabled bool 4962 4963 // Spec specifies the interval the job should be run as. It is parsed based 4964 // on the SpecType. 4965 Spec string 4966 4967 // SpecType defines the format of the spec. 4968 SpecType string 4969 4970 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 4971 ProhibitOverlap bool 4972 4973 // TimeZone is the user specified string that determines the time zone to 4974 // launch against. The time zones must be specified from IANA Time Zone 4975 // database, such as "America/New_York". 4976 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 4977 // Reference: https://www.iana.org/time-zones 4978 TimeZone string 4979 4980 // location is the time zone to evaluate the launch time against 4981 location *time.Location 4982} 4983 4984func (p *PeriodicConfig) Copy() *PeriodicConfig { 4985 if p == nil { 4986 return nil 4987 } 4988 np := new(PeriodicConfig) 4989 *np = *p 4990 return np 4991} 4992 4993func (p *PeriodicConfig) Validate() error { 4994 if !p.Enabled { 4995 return nil 4996 } 4997 4998 var mErr multierror.Error 4999 if p.Spec == "" { 5000 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 5001 } 5002 5003 // Check if we got a valid time zone 5004 if p.TimeZone != "" { 5005 if _, err := time.LoadLocation(p.TimeZone); err != nil { 5006 _ = multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 5007 } 5008 } 5009 5010 switch p.SpecType { 5011 case PeriodicSpecCron: 5012 // Validate the cron spec 5013 if _, err := cronexpr.Parse(p.Spec); err != nil { 5014 _ = multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 5015 } 5016 case PeriodicSpecTest: 5017 // No-op 5018 default: 5019 _ = multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 5020 } 5021 5022 return mErr.ErrorOrNil() 5023} 5024 5025func (p *PeriodicConfig) Canonicalize() { 5026 // Load the location 5027 l, err := time.LoadLocation(p.TimeZone) 5028 if err != nil { 5029 p.location = time.UTC 5030 } 5031 5032 p.location = l 5033} 5034 5035// CronParseNext is a helper that parses the next time for the given expression 5036// but captures any panic that may occur in the underlying library. 5037func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 5038 defer func() { 5039 if recover() != nil { 5040 t = time.Time{} 5041 err = fmt.Errorf("failed parsing cron expression: %q", spec) 5042 } 5043 }() 5044 5045 return e.Next(fromTime), nil 5046} 5047 5048// Next returns the closest time instant matching the spec that is after the 5049// passed time. If no matching instance exists, the zero value of time.Time is 5050// returned. The `time.Location` of the returned value matches that of the 5051// passed time. 5052func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 5053 switch p.SpecType { 5054 case PeriodicSpecCron: 5055 e, err := cronexpr.Parse(p.Spec) 5056 if err != nil { 5057 return time.Time{}, fmt.Errorf("failed parsing cron expression: %q: %v", p.Spec, err) 5058 } 5059 return CronParseNext(e, fromTime, p.Spec) 5060 case PeriodicSpecTest: 5061 split := strings.Split(p.Spec, ",") 5062 if len(split) == 1 && split[0] == "" { 5063 return time.Time{}, nil 5064 } 5065 5066 // Parse the times 5067 times := make([]time.Time, len(split)) 5068 for i, s := range split { 5069 unix, err := strconv.Atoi(s) 5070 if err != nil { 5071 return time.Time{}, nil 5072 } 5073 5074 times[i] = time.Unix(int64(unix), 0) 5075 } 5076 5077 // Find the next match 5078 for _, next := range times { 5079 if fromTime.Before(next) { 5080 return next, nil 5081 } 5082 } 5083 } 5084 5085 return time.Time{}, nil 5086} 5087 5088// GetLocation returns the location to use for determining the time zone to run 5089// the periodic job against. 5090func (p *PeriodicConfig) GetLocation() *time.Location { 5091 // Jobs pre 0.5.5 will not have this 5092 if p.location != nil { 5093 return p.location 5094 } 5095 5096 return time.UTC 5097} 5098 5099const ( 5100 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 5101 // when launching derived instances of it. 5102 PeriodicLaunchSuffix = "/periodic-" 5103) 5104 5105// PeriodicLaunch tracks the last launch time of a periodic job. 5106type PeriodicLaunch struct { 5107 ID string // ID of the periodic job. 5108 Namespace string // Namespace of the periodic job 5109 Launch time.Time // The last launch time. 5110 5111 // Raft Indexes 5112 CreateIndex uint64 5113 ModifyIndex uint64 5114} 5115 5116const ( 5117 DispatchPayloadForbidden = "forbidden" 5118 DispatchPayloadOptional = "optional" 5119 DispatchPayloadRequired = "required" 5120 5121 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 5122 // when dispatching instances of it. 5123 DispatchLaunchSuffix = "/dispatch-" 5124) 5125 5126// ParameterizedJobConfig is used to configure the parameterized job 5127type ParameterizedJobConfig struct { 5128 // Payload configure the payload requirements 5129 Payload string 5130 5131 // MetaRequired is metadata keys that must be specified by the dispatcher 5132 MetaRequired []string 5133 5134 // MetaOptional is metadata keys that may be specified by the dispatcher 5135 MetaOptional []string 5136} 5137 5138func (d *ParameterizedJobConfig) Validate() error { 5139 var mErr multierror.Error 5140 switch d.Payload { 5141 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 5142 default: 5143 _ = multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 5144 } 5145 5146 // Check that the meta configurations are disjoint sets 5147 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 5148 if !disjoint { 5149 _ = multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 5150 } 5151 5152 return mErr.ErrorOrNil() 5153} 5154 5155func (d *ParameterizedJobConfig) Canonicalize() { 5156 if d.Payload == "" { 5157 d.Payload = DispatchPayloadOptional 5158 } 5159} 5160 5161func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 5162 if d == nil { 5163 return nil 5164 } 5165 nd := new(ParameterizedJobConfig) 5166 *nd = *d 5167 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 5168 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 5169 return nd 5170} 5171 5172// DispatchedID returns an ID appropriate for a job dispatched against a 5173// particular parameterized job 5174func DispatchedID(templateID string, t time.Time) string { 5175 u := uuid.Generate()[:8] 5176 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 5177} 5178 5179// DispatchPayloadConfig configures how a task gets its input from a job dispatch 5180type DispatchPayloadConfig struct { 5181 // File specifies a relative path to where the input data should be written 5182 File string 5183} 5184 5185func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 5186 if d == nil { 5187 return nil 5188 } 5189 nd := new(DispatchPayloadConfig) 5190 *nd = *d 5191 return nd 5192} 5193 5194func (d *DispatchPayloadConfig) Validate() error { 5195 // Verify the destination doesn't escape 5196 escaped, err := PathEscapesAllocDir("task/local/", d.File) 5197 if err != nil { 5198 return fmt.Errorf("invalid destination path: %v", err) 5199 } else if escaped { 5200 return fmt.Errorf("destination escapes allocation directory") 5201 } 5202 5203 return nil 5204} 5205 5206const ( 5207 TaskLifecycleHookPrestart = "prestart" 5208 TaskLifecycleHookPoststart = "poststart" 5209 TaskLifecycleHookPoststop = "poststop" 5210) 5211 5212type TaskLifecycleConfig struct { 5213 Hook string 5214 Sidecar bool 5215} 5216 5217func (d *TaskLifecycleConfig) Copy() *TaskLifecycleConfig { 5218 if d == nil { 5219 return nil 5220 } 5221 nd := new(TaskLifecycleConfig) 5222 *nd = *d 5223 return nd 5224} 5225 5226func (d *TaskLifecycleConfig) Validate() error { 5227 if d == nil { 5228 return nil 5229 } 5230 5231 switch d.Hook { 5232 case TaskLifecycleHookPrestart: 5233 case TaskLifecycleHookPoststart: 5234 case TaskLifecycleHookPoststop: 5235 case "": 5236 return fmt.Errorf("no lifecycle hook provided") 5237 default: 5238 return fmt.Errorf("invalid hook: %v", d.Hook) 5239 } 5240 5241 return nil 5242} 5243 5244var ( 5245 // These default restart policies needs to be in sync with 5246 // Canonicalize in api/tasks.go 5247 5248 DefaultServiceJobRestartPolicy = RestartPolicy{ 5249 Delay: 15 * time.Second, 5250 Attempts: 2, 5251 Interval: 30 * time.Minute, 5252 Mode: RestartPolicyModeFail, 5253 } 5254 DefaultBatchJobRestartPolicy = RestartPolicy{ 5255 Delay: 15 * time.Second, 5256 Attempts: 3, 5257 Interval: 24 * time.Hour, 5258 Mode: RestartPolicyModeFail, 5259 } 5260) 5261 5262var ( 5263 // These default reschedule policies needs to be in sync with 5264 // NewDefaultReschedulePolicy in api/tasks.go 5265 5266 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 5267 Delay: 30 * time.Second, 5268 DelayFunction: "exponential", 5269 MaxDelay: 1 * time.Hour, 5270 Unlimited: true, 5271 } 5272 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 5273 Attempts: 1, 5274 Interval: 24 * time.Hour, 5275 Delay: 5 * time.Second, 5276 DelayFunction: "constant", 5277 } 5278) 5279 5280const ( 5281 // RestartPolicyModeDelay causes an artificial delay till the next interval is 5282 // reached when the specified attempts have been reached in the interval. 5283 RestartPolicyModeDelay = "delay" 5284 5285 // RestartPolicyModeFail causes a job to fail if the specified number of 5286 // attempts are reached within an interval. 5287 RestartPolicyModeFail = "fail" 5288 5289 // RestartPolicyMinInterval is the minimum interval that is accepted for a 5290 // restart policy. 5291 RestartPolicyMinInterval = 5 * time.Second 5292 5293 // ReasonWithinPolicy describes restart events that are within policy 5294 ReasonWithinPolicy = "Restart within policy" 5295) 5296 5297// JobScalingEvents contains the scaling events for a given job 5298type JobScalingEvents struct { 5299 Namespace string 5300 JobID string 5301 5302 // This map is indexed by target; currently, this is just task group 5303 // the indexed array is sorted from newest to oldest event 5304 // the array should have less than JobTrackedScalingEvents entries 5305 ScalingEvents map[string][]*ScalingEvent 5306 5307 // Raft index 5308 ModifyIndex uint64 5309} 5310 5311// Factory method for ScalingEvent objects 5312func NewScalingEvent(message string) *ScalingEvent { 5313 return &ScalingEvent{ 5314 Time: time.Now().Unix(), 5315 Message: message, 5316 } 5317} 5318 5319// ScalingEvent describes a scaling event against a Job 5320type ScalingEvent struct { 5321 // Unix Nanosecond timestamp for the scaling event 5322 Time int64 5323 5324 // Count is the new scaling count, if provided 5325 Count *int64 5326 5327 // PreviousCount is the count at the time of the scaling event 5328 PreviousCount int64 5329 5330 // Message is the message describing a scaling event 5331 Message string 5332 5333 // Error indicates an error state for this scaling event 5334 Error bool 5335 5336 // Meta is a map of metadata returned during a scaling event 5337 Meta map[string]interface{} 5338 5339 // EvalID is the ID for an evaluation if one was created as part of a scaling event 5340 EvalID *string 5341 5342 // Raft index 5343 CreateIndex uint64 5344} 5345 5346func (e *ScalingEvent) SetError(error bool) *ScalingEvent { 5347 e.Error = error 5348 return e 5349} 5350 5351func (e *ScalingEvent) SetMeta(meta map[string]interface{}) *ScalingEvent { 5352 e.Meta = meta 5353 return e 5354} 5355 5356func (e *ScalingEvent) SetEvalID(evalID string) *ScalingEvent { 5357 e.EvalID = &evalID 5358 return e 5359} 5360 5361// ScalingEventRequest is by for Job.Scale endpoint 5362// to register scaling events 5363type ScalingEventRequest struct { 5364 Namespace string 5365 JobID string 5366 TaskGroup string 5367 5368 ScalingEvent *ScalingEvent 5369} 5370 5371// ScalingPolicy specifies the scaling policy for a scaling target 5372type ScalingPolicy struct { 5373 // ID is a generated UUID used for looking up the scaling policy 5374 ID string 5375 5376 // Type is the type of scaling performed by the policy 5377 Type string 5378 5379 // Target contains information about the target of the scaling policy, like job and group 5380 Target map[string]string 5381 5382 // Policy is an opaque description of the scaling policy, passed to the autoscaler 5383 Policy map[string]interface{} 5384 5385 // Min is the minimum allowable scaling count for this target 5386 Min int64 5387 5388 // Max is the maximum allowable scaling count for this target 5389 Max int64 5390 5391 // Enabled indicates whether this policy has been enabled/disabled 5392 Enabled bool 5393 5394 CreateIndex uint64 5395 ModifyIndex uint64 5396} 5397 5398// JobKey returns a key that is unique to a job-scoped target, useful as a map 5399// key. This uses the policy type, plus target (group and task). 5400func (p *ScalingPolicy) JobKey() string { 5401 return p.Type + "\000" + 5402 p.Target[ScalingTargetGroup] + "\000" + 5403 p.Target[ScalingTargetTask] 5404} 5405 5406const ( 5407 ScalingTargetNamespace = "Namespace" 5408 ScalingTargetJob = "Job" 5409 ScalingTargetGroup = "Group" 5410 ScalingTargetTask = "Task" 5411 5412 ScalingPolicyTypeHorizontal = "horizontal" 5413) 5414 5415func (p *ScalingPolicy) Canonicalize() { 5416 if p.Type == "" { 5417 p.Type = ScalingPolicyTypeHorizontal 5418 } 5419} 5420 5421func (p *ScalingPolicy) Copy() *ScalingPolicy { 5422 if p == nil { 5423 return nil 5424 } 5425 5426 opaquePolicyConfig, err := copystructure.Copy(p.Policy) 5427 if err != nil { 5428 panic(err.Error()) 5429 } 5430 5431 c := ScalingPolicy{ 5432 ID: p.ID, 5433 Policy: opaquePolicyConfig.(map[string]interface{}), 5434 Enabled: p.Enabled, 5435 Type: p.Type, 5436 Min: p.Min, 5437 Max: p.Max, 5438 CreateIndex: p.CreateIndex, 5439 ModifyIndex: p.ModifyIndex, 5440 } 5441 c.Target = make(map[string]string, len(p.Target)) 5442 for k, v := range p.Target { 5443 c.Target[k] = v 5444 } 5445 return &c 5446} 5447 5448func (p *ScalingPolicy) Validate() error { 5449 if p == nil { 5450 return nil 5451 } 5452 5453 var mErr multierror.Error 5454 5455 // Check policy type and target 5456 if p.Type == "" { 5457 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing scaling policy type")) 5458 } else { 5459 mErr.Errors = append(mErr.Errors, p.validateType().Errors...) 5460 } 5461 5462 // Check Min and Max 5463 if p.Max < 0 { 5464 mErr.Errors = append(mErr.Errors, 5465 fmt.Errorf("maximum count must be specified and non-negative")) 5466 } else if p.Max < p.Min { 5467 mErr.Errors = append(mErr.Errors, 5468 fmt.Errorf("maximum count must not be less than minimum count")) 5469 } 5470 5471 if p.Min < 0 { 5472 mErr.Errors = append(mErr.Errors, 5473 fmt.Errorf("minimum count must be specified and non-negative")) 5474 } 5475 5476 return mErr.ErrorOrNil() 5477} 5478 5479func (p *ScalingPolicy) validateTargetHorizontal() (mErr multierror.Error) { 5480 if len(p.Target) == 0 { 5481 // This is probably not a Nomad horizontal policy 5482 return 5483 } 5484 5485 // Nomad horizontal policies should have Namespace, Job and TaskGroup 5486 if p.Target[ScalingTargetNamespace] == "" { 5487 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target namespace")) 5488 } 5489 if p.Target[ScalingTargetJob] == "" { 5490 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target job")) 5491 } 5492 if p.Target[ScalingTargetGroup] == "" { 5493 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target group")) 5494 } 5495 return 5496} 5497 5498// Diff indicates whether the specification for a given scaling policy has changed 5499func (p *ScalingPolicy) Diff(p2 *ScalingPolicy) bool { 5500 copy := *p2 5501 copy.ID = p.ID 5502 copy.CreateIndex = p.CreateIndex 5503 copy.ModifyIndex = p.ModifyIndex 5504 return !reflect.DeepEqual(*p, copy) 5505} 5506 5507// TarketTaskGroup updates a ScalingPolicy target to specify a given task group 5508func (p *ScalingPolicy) TargetTaskGroup(job *Job, tg *TaskGroup) *ScalingPolicy { 5509 p.Target = map[string]string{ 5510 ScalingTargetNamespace: job.Namespace, 5511 ScalingTargetJob: job.ID, 5512 ScalingTargetGroup: tg.Name, 5513 } 5514 return p 5515} 5516 5517// TargetTask updates a ScalingPolicy target to specify a given task 5518func (p *ScalingPolicy) TargetTask(job *Job, tg *TaskGroup, task *Task) *ScalingPolicy { 5519 p.TargetTaskGroup(job, tg) 5520 p.Target[ScalingTargetTask] = task.Name 5521 return p 5522} 5523 5524func (p *ScalingPolicy) Stub() *ScalingPolicyListStub { 5525 stub := &ScalingPolicyListStub{ 5526 ID: p.ID, 5527 Type: p.Type, 5528 Target: make(map[string]string), 5529 Enabled: p.Enabled, 5530 CreateIndex: p.CreateIndex, 5531 ModifyIndex: p.ModifyIndex, 5532 } 5533 for k, v := range p.Target { 5534 stub.Target[k] = v 5535 } 5536 return stub 5537} 5538 5539// GetScalingPolicies returns a slice of all scaling scaling policies for this job 5540func (j *Job) GetScalingPolicies() []*ScalingPolicy { 5541 ret := make([]*ScalingPolicy, 0) 5542 5543 for _, tg := range j.TaskGroups { 5544 if tg.Scaling != nil { 5545 ret = append(ret, tg.Scaling) 5546 } 5547 } 5548 5549 ret = append(ret, j.GetEntScalingPolicies()...) 5550 5551 return ret 5552} 5553 5554// ScalingPolicyListStub is used to return a subset of scaling policy information 5555// for the scaling policy list 5556type ScalingPolicyListStub struct { 5557 ID string 5558 Enabled bool 5559 Type string 5560 Target map[string]string 5561 CreateIndex uint64 5562 ModifyIndex uint64 5563} 5564 5565// RestartPolicy configures how Tasks are restarted when they crash or fail. 5566type RestartPolicy struct { 5567 // Attempts is the number of restart that will occur in an interval. 5568 Attempts int 5569 5570 // Interval is a duration in which we can limit the number of restarts 5571 // within. 5572 Interval time.Duration 5573 5574 // Delay is the time between a failure and a restart. 5575 Delay time.Duration 5576 5577 // Mode controls what happens when the task restarts more than attempt times 5578 // in an interval. 5579 Mode string 5580} 5581 5582func (r *RestartPolicy) Copy() *RestartPolicy { 5583 if r == nil { 5584 return nil 5585 } 5586 nrp := new(RestartPolicy) 5587 *nrp = *r 5588 return nrp 5589} 5590 5591func (r *RestartPolicy) Validate() error { 5592 var mErr multierror.Error 5593 switch r.Mode { 5594 case RestartPolicyModeDelay, RestartPolicyModeFail: 5595 default: 5596 _ = multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 5597 } 5598 5599 // Check for ambiguous/confusing settings 5600 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 5601 _ = multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 5602 } 5603 5604 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 5605 _ = multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 5606 } 5607 if time.Duration(r.Attempts)*r.Delay > r.Interval { 5608 _ = multierror.Append(&mErr, 5609 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 5610 } 5611 return mErr.ErrorOrNil() 5612} 5613 5614func NewRestartPolicy(jobType string) *RestartPolicy { 5615 switch jobType { 5616 case JobTypeService, JobTypeSystem: 5617 rp := DefaultServiceJobRestartPolicy 5618 return &rp 5619 case JobTypeBatch: 5620 rp := DefaultBatchJobRestartPolicy 5621 return &rp 5622 } 5623 return nil 5624} 5625 5626const ReschedulePolicyMinInterval = 15 * time.Second 5627const ReschedulePolicyMinDelay = 5 * time.Second 5628 5629var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 5630 5631// ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 5632type ReschedulePolicy struct { 5633 // Attempts limits the number of rescheduling attempts that can occur in an interval. 5634 Attempts int 5635 5636 // Interval is a duration in which we can limit the number of reschedule attempts. 5637 Interval time.Duration 5638 5639 // Delay is a minimum duration to wait between reschedule attempts. 5640 // The delay function determines how much subsequent reschedule attempts are delayed by. 5641 Delay time.Duration 5642 5643 // DelayFunction determines how the delay progressively changes on subsequent reschedule 5644 // attempts. Valid values are "exponential", "constant", and "fibonacci". 5645 DelayFunction string 5646 5647 // MaxDelay is an upper bound on the delay. 5648 MaxDelay time.Duration 5649 5650 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 5651 // between reschedule attempts. 5652 Unlimited bool 5653} 5654 5655func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 5656 if r == nil { 5657 return nil 5658 } 5659 nrp := new(ReschedulePolicy) 5660 *nrp = *r 5661 return nrp 5662} 5663 5664func (r *ReschedulePolicy) Enabled() bool { 5665 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 5666 return enabled 5667} 5668 5669// Validate uses different criteria to validate the reschedule policy 5670// Delay must be a minimum of 5 seconds 5671// Delay Ceiling is ignored if Delay Function is "constant" 5672// Number of possible attempts is validated, given the interval, delay and delay function 5673func (r *ReschedulePolicy) Validate() error { 5674 if !r.Enabled() { 5675 return nil 5676 } 5677 var mErr multierror.Error 5678 // Check for ambiguous/confusing settings 5679 if r.Attempts > 0 { 5680 if r.Interval <= 0 { 5681 _ = multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 5682 } 5683 if r.Unlimited { 5684 _ = multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 5685 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 5686 _ = multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 5687 } 5688 } 5689 5690 delayPreCheck := true 5691 // Delay should be bigger than the default 5692 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 5693 _ = multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 5694 delayPreCheck = false 5695 } 5696 5697 // Must use a valid delay function 5698 if !isValidDelayFunction(r.DelayFunction) { 5699 _ = multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 5700 delayPreCheck = false 5701 } 5702 5703 // Validate MaxDelay if not using linear delay progression 5704 if r.DelayFunction != "constant" { 5705 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 5706 _ = multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 5707 delayPreCheck = false 5708 } 5709 if r.MaxDelay < r.Delay { 5710 _ = multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 5711 delayPreCheck = false 5712 } 5713 5714 } 5715 5716 // Validate Interval and other delay parameters if attempts are limited 5717 if !r.Unlimited { 5718 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 5719 _ = multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 5720 } 5721 if !delayPreCheck { 5722 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 5723 return mErr.ErrorOrNil() 5724 } 5725 crossValidationErr := r.validateDelayParams() 5726 if crossValidationErr != nil { 5727 _ = multierror.Append(&mErr, crossValidationErr) 5728 } 5729 } 5730 return mErr.ErrorOrNil() 5731} 5732 5733func isValidDelayFunction(delayFunc string) bool { 5734 for _, value := range RescheduleDelayFunctions { 5735 if value == delayFunc { 5736 return true 5737 } 5738 } 5739 return false 5740} 5741 5742func (r *ReschedulePolicy) validateDelayParams() error { 5743 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 5744 if ok { 5745 return nil 5746 } 5747 var mErr multierror.Error 5748 if r.DelayFunction == "constant" { 5749 _ = multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 5750 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 5751 } else { 5752 _ = multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 5753 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 5754 } 5755 _ = multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 5756 return mErr.ErrorOrNil() 5757} 5758 5759func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 5760 var possibleAttempts int 5761 var recommendedInterval time.Duration 5762 valid := true 5763 switch r.DelayFunction { 5764 case "constant": 5765 recommendedInterval = time.Duration(r.Attempts) * r.Delay 5766 if r.Interval < recommendedInterval { 5767 possibleAttempts = int(r.Interval / r.Delay) 5768 valid = false 5769 } 5770 case "exponential": 5771 for i := 0; i < r.Attempts; i++ { 5772 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 5773 if nextDelay > r.MaxDelay { 5774 nextDelay = r.MaxDelay 5775 recommendedInterval += nextDelay 5776 } else { 5777 recommendedInterval = nextDelay 5778 } 5779 if recommendedInterval < r.Interval { 5780 possibleAttempts++ 5781 } 5782 } 5783 if possibleAttempts < r.Attempts { 5784 valid = false 5785 } 5786 case "fibonacci": 5787 var slots []time.Duration 5788 slots = append(slots, r.Delay) 5789 slots = append(slots, r.Delay) 5790 reachedCeiling := false 5791 for i := 2; i < r.Attempts; i++ { 5792 var nextDelay time.Duration 5793 if reachedCeiling { 5794 //switch to linear 5795 nextDelay = slots[i-1] + r.MaxDelay 5796 } else { 5797 nextDelay = slots[i-1] + slots[i-2] 5798 if nextDelay > r.MaxDelay { 5799 nextDelay = r.MaxDelay 5800 reachedCeiling = true 5801 } 5802 } 5803 slots = append(slots, nextDelay) 5804 } 5805 recommendedInterval = slots[len(slots)-1] 5806 if r.Interval < recommendedInterval { 5807 valid = false 5808 // calculate possible attempts 5809 for i := 0; i < len(slots); i++ { 5810 if slots[i] > r.Interval { 5811 possibleAttempts = i 5812 break 5813 } 5814 } 5815 } 5816 default: 5817 return false, 0, 0 5818 } 5819 if possibleAttempts < 0 { // can happen if delay is bigger than interval 5820 possibleAttempts = 0 5821 } 5822 return valid, possibleAttempts, recommendedInterval 5823} 5824 5825func NewReschedulePolicy(jobType string) *ReschedulePolicy { 5826 switch jobType { 5827 case JobTypeService: 5828 rp := DefaultServiceJobReschedulePolicy 5829 return &rp 5830 case JobTypeBatch: 5831 rp := DefaultBatchJobReschedulePolicy 5832 return &rp 5833 } 5834 return nil 5835} 5836 5837const ( 5838 MigrateStrategyHealthChecks = "checks" 5839 MigrateStrategyHealthStates = "task_states" 5840) 5841 5842type MigrateStrategy struct { 5843 MaxParallel int 5844 HealthCheck string 5845 MinHealthyTime time.Duration 5846 HealthyDeadline time.Duration 5847} 5848 5849// DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 5850// that lack an update strategy. 5851// 5852// This function should match its counterpart in api/tasks.go 5853func DefaultMigrateStrategy() *MigrateStrategy { 5854 return &MigrateStrategy{ 5855 MaxParallel: 1, 5856 HealthCheck: MigrateStrategyHealthChecks, 5857 MinHealthyTime: 10 * time.Second, 5858 HealthyDeadline: 5 * time.Minute, 5859 } 5860} 5861 5862func (m *MigrateStrategy) Validate() error { 5863 var mErr multierror.Error 5864 5865 if m.MaxParallel < 0 { 5866 _ = multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 5867 } 5868 5869 switch m.HealthCheck { 5870 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 5871 // ok 5872 case "": 5873 if m.MaxParallel > 0 { 5874 _ = multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 5875 } 5876 default: 5877 _ = multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 5878 } 5879 5880 if m.MinHealthyTime < 0 { 5881 _ = multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 5882 } 5883 5884 if m.HealthyDeadline < 0 { 5885 _ = multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 5886 } 5887 5888 if m.MinHealthyTime > m.HealthyDeadline { 5889 _ = multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 5890 } 5891 5892 return mErr.ErrorOrNil() 5893} 5894 5895// TaskGroup is an atomic unit of placement. Each task group belongs to 5896// a job and may contain any number of tasks. A task group support running 5897// in many replicas using the same configuration.. 5898type TaskGroup struct { 5899 // Name of the task group 5900 Name string 5901 5902 // Count is the number of replicas of this task group that should 5903 // be scheduled. 5904 Count int 5905 5906 // Update is used to control the update strategy for this task group 5907 Update *UpdateStrategy 5908 5909 // Migrate is used to control the migration strategy for this task group 5910 Migrate *MigrateStrategy 5911 5912 // Constraints can be specified at a task group level and apply to 5913 // all the tasks contained. 5914 Constraints []*Constraint 5915 5916 // Scaling is the list of autoscaling policies for the TaskGroup 5917 Scaling *ScalingPolicy 5918 5919 // RestartPolicy of a TaskGroup 5920 RestartPolicy *RestartPolicy 5921 5922 // Tasks are the collection of tasks that this task group needs to run 5923 Tasks []*Task 5924 5925 // EphemeralDisk is the disk resources that the task group requests 5926 EphemeralDisk *EphemeralDisk 5927 5928 // Meta is used to associate arbitrary metadata with this 5929 // task group. This is opaque to Nomad. 5930 Meta map[string]string 5931 5932 // ReschedulePolicy is used to configure how the scheduler should 5933 // retry failed allocations. 5934 ReschedulePolicy *ReschedulePolicy 5935 5936 // Affinities can be specified at the task group level to express 5937 // scheduling preferences. 5938 Affinities []*Affinity 5939 5940 // Spread can be specified at the task group level to express spreading 5941 // allocations across a desired attribute, such as datacenter 5942 Spreads []*Spread 5943 5944 // Networks are the network configuration for the task group. This can be 5945 // overridden in the task. 5946 Networks Networks 5947 5948 // Consul configuration specific to this task group 5949 Consul *Consul 5950 5951 // Services this group provides 5952 Services []*Service 5953 5954 // Volumes is a map of volumes that have been requested by the task group. 5955 Volumes map[string]*VolumeRequest 5956 5957 // ShutdownDelay is the amount of time to wait between deregistering 5958 // group services in consul and stopping tasks. 5959 ShutdownDelay *time.Duration 5960 5961 // StopAfterClientDisconnect, if set, configures the client to stop the task group 5962 // after this duration since the last known good heartbeat 5963 StopAfterClientDisconnect *time.Duration 5964} 5965 5966func (tg *TaskGroup) Copy() *TaskGroup { 5967 if tg == nil { 5968 return nil 5969 } 5970 ntg := new(TaskGroup) 5971 *ntg = *tg 5972 ntg.Update = ntg.Update.Copy() 5973 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 5974 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 5975 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 5976 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 5977 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 5978 ntg.Volumes = CopyMapVolumeRequest(ntg.Volumes) 5979 ntg.Scaling = ntg.Scaling.Copy() 5980 ntg.Consul = ntg.Consul.Copy() 5981 5982 // Copy the network objects 5983 if tg.Networks != nil { 5984 n := len(tg.Networks) 5985 ntg.Networks = make([]*NetworkResource, n) 5986 for i := 0; i < n; i++ { 5987 ntg.Networks[i] = tg.Networks[i].Copy() 5988 } 5989 } 5990 5991 if tg.Tasks != nil { 5992 tasks := make([]*Task, len(ntg.Tasks)) 5993 for i, t := range ntg.Tasks { 5994 tasks[i] = t.Copy() 5995 } 5996 ntg.Tasks = tasks 5997 } 5998 5999 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 6000 6001 if tg.EphemeralDisk != nil { 6002 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 6003 } 6004 6005 if tg.Services != nil { 6006 ntg.Services = make([]*Service, len(tg.Services)) 6007 for i, s := range tg.Services { 6008 ntg.Services[i] = s.Copy() 6009 } 6010 } 6011 6012 if tg.ShutdownDelay != nil { 6013 ntg.ShutdownDelay = tg.ShutdownDelay 6014 } 6015 6016 if tg.StopAfterClientDisconnect != nil { 6017 ntg.StopAfterClientDisconnect = tg.StopAfterClientDisconnect 6018 } 6019 6020 return ntg 6021} 6022 6023// Canonicalize is used to canonicalize fields in the TaskGroup. 6024func (tg *TaskGroup) Canonicalize(job *Job) { 6025 // Ensure that an empty and nil map are treated the same to avoid scheduling 6026 // problems since we use reflect DeepEquals. 6027 if len(tg.Meta) == 0 { 6028 tg.Meta = nil 6029 } 6030 6031 // Set the default restart policy. 6032 if tg.RestartPolicy == nil { 6033 tg.RestartPolicy = NewRestartPolicy(job.Type) 6034 } 6035 6036 if tg.ReschedulePolicy == nil { 6037 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 6038 } 6039 6040 // Canonicalize Migrate for service jobs 6041 if job.Type == JobTypeService && tg.Migrate == nil { 6042 tg.Migrate = DefaultMigrateStrategy() 6043 } 6044 6045 // Set a default ephemeral disk object if the user has not requested for one 6046 if tg.EphemeralDisk == nil { 6047 tg.EphemeralDisk = DefaultEphemeralDisk() 6048 } 6049 6050 if tg.Scaling != nil { 6051 tg.Scaling.Canonicalize() 6052 } 6053 6054 for _, service := range tg.Services { 6055 service.Canonicalize(job.Name, tg.Name, "group") 6056 } 6057 6058 for _, network := range tg.Networks { 6059 network.Canonicalize() 6060 } 6061 6062 for _, task := range tg.Tasks { 6063 task.Canonicalize(job, tg) 6064 } 6065} 6066 6067// Validate is used to check a task group for reasonable configuration 6068func (tg *TaskGroup) Validate(j *Job) error { 6069 var mErr multierror.Error 6070 if tg.Name == "" { 6071 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 6072 } else if strings.Contains(tg.Name, "\000") { 6073 mErr.Errors = append(mErr.Errors, errors.New("Task group name contains null character")) 6074 } 6075 if tg.Count < 0 { 6076 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 6077 } 6078 if len(tg.Tasks) == 0 { 6079 // could be a lone consul gateway inserted by the connect mutator 6080 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 6081 } 6082 6083 for idx, constr := range tg.Constraints { 6084 if err := constr.Validate(); err != nil { 6085 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 6086 mErr.Errors = append(mErr.Errors, outer) 6087 } 6088 } 6089 if j.Type == JobTypeSystem { 6090 if tg.Affinities != nil { 6091 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 6092 } 6093 } else { 6094 for idx, affinity := range tg.Affinities { 6095 if err := affinity.Validate(); err != nil { 6096 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 6097 mErr.Errors = append(mErr.Errors, outer) 6098 } 6099 } 6100 } 6101 6102 if tg.RestartPolicy != nil { 6103 if err := tg.RestartPolicy.Validate(); err != nil { 6104 mErr.Errors = append(mErr.Errors, err) 6105 } 6106 } else { 6107 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 6108 } 6109 6110 if j.Type == JobTypeSystem { 6111 if tg.Spreads != nil { 6112 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 6113 } 6114 } else { 6115 for idx, spread := range tg.Spreads { 6116 if err := spread.Validate(); err != nil { 6117 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 6118 mErr.Errors = append(mErr.Errors, outer) 6119 } 6120 } 6121 } 6122 6123 if j.Type == JobTypeSystem { 6124 if tg.ReschedulePolicy != nil { 6125 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 6126 } 6127 } else { 6128 if tg.ReschedulePolicy != nil { 6129 if err := tg.ReschedulePolicy.Validate(); err != nil { 6130 mErr.Errors = append(mErr.Errors, err) 6131 } 6132 } else { 6133 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 6134 } 6135 } 6136 6137 if tg.EphemeralDisk != nil { 6138 if err := tg.EphemeralDisk.Validate(); err != nil { 6139 mErr.Errors = append(mErr.Errors, err) 6140 } 6141 } else { 6142 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 6143 } 6144 6145 // Validate the update strategy 6146 if u := tg.Update; u != nil { 6147 switch j.Type { 6148 case JobTypeService, JobTypeSystem: 6149 default: 6150 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 6151 } 6152 if err := u.Validate(); err != nil { 6153 mErr.Errors = append(mErr.Errors, err) 6154 } 6155 } 6156 6157 // Validate the migration strategy 6158 switch j.Type { 6159 case JobTypeService: 6160 if tg.Migrate != nil { 6161 if err := tg.Migrate.Validate(); err != nil { 6162 mErr.Errors = append(mErr.Errors, err) 6163 } 6164 } 6165 default: 6166 if tg.Migrate != nil { 6167 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 6168 } 6169 } 6170 6171 // Check that there is only one leader task if any 6172 tasks := make(map[string]int) 6173 leaderTasks := 0 6174 for idx, task := range tg.Tasks { 6175 if task.Name == "" { 6176 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 6177 } else if existing, ok := tasks[task.Name]; ok { 6178 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 6179 } else { 6180 tasks[task.Name] = idx 6181 } 6182 6183 if task.Leader { 6184 leaderTasks++ 6185 } 6186 } 6187 6188 if leaderTasks > 1 { 6189 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 6190 } 6191 6192 // Validate the volume requests 6193 var canaries int 6194 if tg.Update != nil { 6195 canaries = tg.Update.Canary 6196 } 6197 for name, volReq := range tg.Volumes { 6198 if err := volReq.Validate(canaries); err != nil { 6199 mErr.Errors = append(mErr.Errors, fmt.Errorf( 6200 "Task group volume validation for %s failed: %v", name, err)) 6201 } 6202 } 6203 6204 // Validate task group and task network resources 6205 if err := tg.validateNetworks(); err != nil { 6206 outer := fmt.Errorf("Task group network validation failed: %v", err) 6207 mErr.Errors = append(mErr.Errors, outer) 6208 } 6209 6210 // Validate task group and task services 6211 if err := tg.validateServices(); err != nil { 6212 outer := fmt.Errorf("Task group service validation failed: %v", err) 6213 mErr.Errors = append(mErr.Errors, outer) 6214 } 6215 6216 // Validate group service script-checks 6217 if err := tg.validateScriptChecksInGroupServices(); err != nil { 6218 outer := fmt.Errorf("Task group service check validation failed: %v", err) 6219 mErr.Errors = append(mErr.Errors, outer) 6220 } 6221 6222 // Validate the scaling policy 6223 if err := tg.validateScalingPolicy(j); err != nil { 6224 outer := fmt.Errorf("Task group scaling policy validation failed: %v", err) 6225 mErr.Errors = append(mErr.Errors, outer) 6226 } 6227 6228 // Validate the tasks 6229 for _, task := range tg.Tasks { 6230 // Validate the task does not reference undefined volume mounts 6231 for i, mnt := range task.VolumeMounts { 6232 if mnt.Volume == "" { 6233 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing an empty volume", task.Name, i)) 6234 continue 6235 } 6236 6237 if _, ok := tg.Volumes[mnt.Volume]; !ok { 6238 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing undefined volume %s", task.Name, i, mnt.Volume)) 6239 continue 6240 } 6241 } 6242 6243 if err := task.Validate(tg.EphemeralDisk, j.Type, tg.Services, tg.Networks); err != nil { 6244 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 6245 mErr.Errors = append(mErr.Errors, outer) 6246 } 6247 } 6248 return mErr.ErrorOrNil() 6249} 6250 6251func (tg *TaskGroup) validateNetworks() error { 6252 var mErr multierror.Error 6253 portLabels := make(map[string]string) 6254 // host_network -> static port tracking 6255 staticPortsIndex := make(map[string]map[int]string) 6256 6257 for _, net := range tg.Networks { 6258 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 6259 if other, ok := portLabels[port.Label]; ok { 6260 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 6261 } else { 6262 portLabels[port.Label] = "taskgroup network" 6263 } 6264 6265 if port.Value != 0 { 6266 hostNetwork := port.HostNetwork 6267 if hostNetwork == "" { 6268 hostNetwork = "default" 6269 } 6270 staticPorts, ok := staticPortsIndex[hostNetwork] 6271 if !ok { 6272 staticPorts = make(map[int]string) 6273 } 6274 // static port 6275 if other, ok := staticPorts[port.Value]; ok { 6276 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 6277 mErr.Errors = append(mErr.Errors, err) 6278 } else if port.Value > math.MaxUint16 { 6279 err := fmt.Errorf("Port %s (%d) cannot be greater than %d", port.Label, port.Value, math.MaxUint16) 6280 mErr.Errors = append(mErr.Errors, err) 6281 } else { 6282 staticPorts[port.Value] = fmt.Sprintf("taskgroup network:%s", port.Label) 6283 staticPortsIndex[hostNetwork] = staticPorts 6284 } 6285 } 6286 6287 if port.To < -1 { 6288 err := fmt.Errorf("Port %q cannot be mapped to negative value %d", port.Label, port.To) 6289 mErr.Errors = append(mErr.Errors, err) 6290 } else if port.To > math.MaxUint16 { 6291 err := fmt.Errorf("Port %q cannot be mapped to a port (%d) greater than %d", port.Label, port.To, math.MaxUint16) 6292 mErr.Errors = append(mErr.Errors, err) 6293 } 6294 } 6295 } 6296 // Check for duplicate tasks or port labels, and no duplicated static ports 6297 for _, task := range tg.Tasks { 6298 if task.Resources == nil { 6299 continue 6300 } 6301 6302 for _, net := range task.Resources.Networks { 6303 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 6304 if other, ok := portLabels[port.Label]; ok { 6305 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 6306 } 6307 6308 if port.Value != 0 { 6309 hostNetwork := port.HostNetwork 6310 if hostNetwork == "" { 6311 hostNetwork = "default" 6312 } 6313 staticPorts, ok := staticPortsIndex[hostNetwork] 6314 if !ok { 6315 staticPorts = make(map[int]string) 6316 } 6317 if other, ok := staticPorts[port.Value]; ok { 6318 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 6319 mErr.Errors = append(mErr.Errors, err) 6320 } else if port.Value > math.MaxUint16 { 6321 err := fmt.Errorf("Port %s (%d) cannot be greater than %d", port.Label, port.Value, math.MaxUint16) 6322 mErr.Errors = append(mErr.Errors, err) 6323 } else { 6324 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 6325 staticPortsIndex[hostNetwork] = staticPorts 6326 } 6327 } 6328 } 6329 } 6330 } 6331 return mErr.ErrorOrNil() 6332} 6333 6334// validateServices runs Service.Validate() on group-level services, 6335// checks that group services do not conflict with task services and that 6336// group service checks that refer to tasks only refer to tasks that exist. 6337func (tg *TaskGroup) validateServices() error { 6338 var mErr multierror.Error 6339 knownTasks := make(map[string]struct{}) 6340 knownServices := make(map[string]struct{}) 6341 6342 // Create a map of known tasks and their services so we can compare 6343 // vs the group-level services and checks 6344 for _, task := range tg.Tasks { 6345 knownTasks[task.Name] = struct{}{} 6346 if task.Services == nil { 6347 continue 6348 } 6349 for _, service := range task.Services { 6350 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 6351 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 6352 } 6353 for _, check := range service.Checks { 6354 if check.TaskName != "" { 6355 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s is invalid: only task group service checks can be assigned tasks", check.Name)) 6356 } 6357 } 6358 knownServices[service.Name+service.PortLabel] = struct{}{} 6359 } 6360 } 6361 for i, service := range tg.Services { 6362 if err := service.Validate(); err != nil { 6363 outer := fmt.Errorf("Service[%d] %s validation failed: %s", i, service.Name, err) 6364 mErr.Errors = append(mErr.Errors, outer) 6365 // we break here to avoid the risk of crashing on null-pointer 6366 // access in a later step, accepting that we might miss out on 6367 // error messages to provide the user. 6368 continue 6369 } 6370 if service.AddressMode == AddressModeDriver { 6371 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"driver\", only services defined in a \"task\" block can use this mode", service.Name)) 6372 } 6373 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 6374 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 6375 } 6376 knownServices[service.Name+service.PortLabel] = struct{}{} 6377 for _, check := range service.Checks { 6378 if check.TaskName != "" { 6379 if check.Type != ServiceCheckScript && check.Type != ServiceCheckGRPC { 6380 mErr.Errors = append(mErr.Errors, 6381 fmt.Errorf("Check %s invalid: only script and gRPC checks should have tasks", check.Name)) 6382 } 6383 if check.AddressMode == AddressModeDriver { 6384 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %q invalid: cannot use address_mode=\"driver\", only checks defined in a \"task\" service block can use this mode", service.Name)) 6385 } 6386 if _, ok := knownTasks[check.TaskName]; !ok { 6387 mErr.Errors = append(mErr.Errors, 6388 fmt.Errorf("Check %s invalid: refers to non-existent task %s", check.Name, check.TaskName)) 6389 } 6390 } 6391 } 6392 } 6393 return mErr.ErrorOrNil() 6394} 6395 6396// validateScriptChecksInGroupServices ensures group-level services with script 6397// checks know what task driver to use. Either the service.task or service.check.task 6398// parameter must be configured. 6399func (tg *TaskGroup) validateScriptChecksInGroupServices() error { 6400 var mErr multierror.Error 6401 for _, service := range tg.Services { 6402 if service.TaskName == "" { 6403 for _, check := range service.Checks { 6404 if check.Type == "script" && check.TaskName == "" { 6405 mErr.Errors = append(mErr.Errors, 6406 fmt.Errorf("Service [%s]->%s or Check %s must specify task parameter", 6407 tg.Name, service.Name, check.Name, 6408 )) 6409 } 6410 } 6411 } 6412 } 6413 return mErr.ErrorOrNil() 6414} 6415 6416// validateScalingPolicy ensures that the scaling policy has consistent 6417// min and max, not in conflict with the task group count 6418func (tg *TaskGroup) validateScalingPolicy(j *Job) error { 6419 if tg.Scaling == nil { 6420 return nil 6421 } 6422 6423 var mErr multierror.Error 6424 6425 err := tg.Scaling.Validate() 6426 if err != nil { 6427 // prefix scaling policy errors 6428 if me, ok := err.(*multierror.Error); ok { 6429 for _, e := range me.Errors { 6430 mErr.Errors = append(mErr.Errors, fmt.Errorf("Scaling policy invalid: %s", e)) 6431 } 6432 } 6433 } 6434 6435 if tg.Scaling.Max < int64(tg.Count) { 6436 mErr.Errors = append(mErr.Errors, 6437 fmt.Errorf("Scaling policy invalid: task group count must not be greater than maximum count in scaling policy")) 6438 } 6439 6440 if int64(tg.Count) < tg.Scaling.Min && !(j.IsMultiregion() && tg.Count == 0 && j.Region == "global") { 6441 mErr.Errors = append(mErr.Errors, 6442 fmt.Errorf("Scaling policy invalid: task group count must not be less than minimum count in scaling policy")) 6443 } 6444 6445 return mErr.ErrorOrNil() 6446} 6447 6448// Warnings returns a list of warnings that may be from dubious settings or 6449// deprecation warnings. 6450func (tg *TaskGroup) Warnings(j *Job) error { 6451 var mErr multierror.Error 6452 6453 // Validate the update strategy 6454 if u := tg.Update; u != nil { 6455 // Check the counts are appropriate 6456 if u.MaxParallel > tg.Count && !(j.IsMultiregion() && tg.Count == 0) { 6457 mErr.Errors = append(mErr.Errors, 6458 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 6459 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 6460 } 6461 } 6462 6463 // Check for mbits network field 6464 if len(tg.Networks) > 0 && tg.Networks[0].MBits > 0 { 6465 mErr.Errors = append(mErr.Errors, fmt.Errorf("mbits has been deprecated as of Nomad 0.12.0. Please remove mbits from the network block")) 6466 } 6467 6468 for _, t := range tg.Tasks { 6469 if err := t.Warnings(); err != nil { 6470 err = multierror.Prefix(err, fmt.Sprintf("Task %q:", t.Name)) 6471 mErr.Errors = append(mErr.Errors, err) 6472 } 6473 } 6474 6475 return mErr.ErrorOrNil() 6476} 6477 6478// LookupTask finds a task by name 6479func (tg *TaskGroup) LookupTask(name string) *Task { 6480 for _, t := range tg.Tasks { 6481 if t.Name == name { 6482 return t 6483 } 6484 } 6485 return nil 6486} 6487 6488// UsesConnect for convenience returns true if the TaskGroup contains at least 6489// one service that makes use of Consul Connect features. 6490// 6491// Currently used for validating that the task group contains one or more connect 6492// aware services before generating a service identity token. 6493func (tg *TaskGroup) UsesConnect() bool { 6494 for _, service := range tg.Services { 6495 if service.Connect != nil { 6496 if service.Connect.IsNative() || service.Connect.HasSidecar() || service.Connect.IsGateway() { 6497 return true 6498 } 6499 } 6500 } 6501 return false 6502} 6503 6504// UsesConnectGateway for convenience returns true if the TaskGroup contains at 6505// least one service that makes use of Consul Connect Gateway features. 6506func (tg *TaskGroup) UsesConnectGateway() bool { 6507 for _, service := range tg.Services { 6508 if service.Connect != nil { 6509 if service.Connect.IsGateway() { 6510 return true 6511 } 6512 } 6513 } 6514 return false 6515} 6516 6517func (tg *TaskGroup) GoString() string { 6518 return fmt.Sprintf("*%#v", *tg) 6519} 6520 6521// CheckRestart describes if and when a task should be restarted based on 6522// failing health checks. 6523type CheckRestart struct { 6524 Limit int // Restart task after this many unhealthy intervals 6525 Grace time.Duration // Grace time to give tasks after starting to get healthy 6526 IgnoreWarnings bool // If true treat checks in `warning` as passing 6527} 6528 6529func (c *CheckRestart) Copy() *CheckRestart { 6530 if c == nil { 6531 return nil 6532 } 6533 6534 nc := new(CheckRestart) 6535 *nc = *c 6536 return nc 6537} 6538 6539func (c *CheckRestart) Equals(o *CheckRestart) bool { 6540 if c == nil || o == nil { 6541 return c == o 6542 } 6543 6544 if c.Limit != o.Limit { 6545 return false 6546 } 6547 6548 if c.Grace != o.Grace { 6549 return false 6550 } 6551 6552 if c.IgnoreWarnings != o.IgnoreWarnings { 6553 return false 6554 } 6555 6556 return true 6557} 6558 6559func (c *CheckRestart) Validate() error { 6560 if c == nil { 6561 return nil 6562 } 6563 6564 var mErr multierror.Error 6565 if c.Limit < 0 { 6566 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 6567 } 6568 6569 if c.Grace < 0 { 6570 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 6571 } 6572 6573 return mErr.ErrorOrNil() 6574} 6575 6576const ( 6577 // DefaultKillTimeout is the default timeout between signaling a task it 6578 // will be killed and killing it. 6579 DefaultKillTimeout = 5 * time.Second 6580) 6581 6582// LogConfig provides configuration for log rotation 6583type LogConfig struct { 6584 MaxFiles int 6585 MaxFileSizeMB int 6586} 6587 6588func (l *LogConfig) Equals(o *LogConfig) bool { 6589 if l == nil || o == nil { 6590 return l == o 6591 } 6592 6593 if l.MaxFiles != o.MaxFiles { 6594 return false 6595 } 6596 6597 if l.MaxFileSizeMB != o.MaxFileSizeMB { 6598 return false 6599 } 6600 6601 return true 6602} 6603 6604func (l *LogConfig) Copy() *LogConfig { 6605 if l == nil { 6606 return nil 6607 } 6608 return &LogConfig{ 6609 MaxFiles: l.MaxFiles, 6610 MaxFileSizeMB: l.MaxFileSizeMB, 6611 } 6612} 6613 6614// DefaultLogConfig returns the default LogConfig values. 6615func DefaultLogConfig() *LogConfig { 6616 return &LogConfig{ 6617 MaxFiles: 10, 6618 MaxFileSizeMB: 10, 6619 } 6620} 6621 6622// Validate returns an error if the log config specified are less than 6623// the minimum allowed. 6624func (l *LogConfig) Validate() error { 6625 var mErr multierror.Error 6626 if l.MaxFiles < 1 { 6627 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 6628 } 6629 if l.MaxFileSizeMB < 1 { 6630 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 6631 } 6632 return mErr.ErrorOrNil() 6633} 6634 6635// Task is a single process typically that is executed as part of a task group. 6636type Task struct { 6637 // Name of the task 6638 Name string 6639 6640 // Driver is used to control which driver is used 6641 Driver string 6642 6643 // User is used to determine which user will run the task. It defaults to 6644 // the same user the Nomad client is being run as. 6645 User string 6646 6647 // Config is provided to the driver to initialize 6648 Config map[string]interface{} 6649 6650 // Map of environment variables to be used by the driver 6651 Env map[string]string 6652 6653 // List of service definitions exposed by the Task 6654 Services []*Service 6655 6656 // Vault is used to define the set of Vault policies that this task should 6657 // have access to. 6658 Vault *Vault 6659 6660 // Templates are the set of templates to be rendered for the task. 6661 Templates []*Template 6662 6663 // Constraints can be specified at a task level and apply only to 6664 // the particular task. 6665 Constraints []*Constraint 6666 6667 // Affinities can be specified at the task level to express 6668 // scheduling preferences 6669 Affinities []*Affinity 6670 6671 // Resources is the resources needed by this task 6672 Resources *Resources 6673 6674 // RestartPolicy of a TaskGroup 6675 RestartPolicy *RestartPolicy 6676 6677 // DispatchPayload configures how the task retrieves its input from a dispatch 6678 DispatchPayload *DispatchPayloadConfig 6679 6680 Lifecycle *TaskLifecycleConfig 6681 6682 // Meta is used to associate arbitrary metadata with this 6683 // task. This is opaque to Nomad. 6684 Meta map[string]string 6685 6686 // KillTimeout is the time between signaling a task that it will be 6687 // killed and killing it. 6688 KillTimeout time.Duration 6689 6690 // LogConfig provides configuration for log rotation 6691 LogConfig *LogConfig 6692 6693 // Artifacts is a list of artifacts to download and extract before running 6694 // the task. 6695 Artifacts []*TaskArtifact 6696 6697 // Leader marks the task as the leader within the group. When the leader 6698 // task exits, other tasks will be gracefully terminated. 6699 Leader bool 6700 6701 // ShutdownDelay is the duration of the delay between deregistering a 6702 // task from Consul and sending it a signal to shutdown. See #2441 6703 ShutdownDelay time.Duration 6704 6705 // VolumeMounts is a list of Volume name <-> mount configurations that will be 6706 // attached to this task. 6707 VolumeMounts []*VolumeMount 6708 6709 // ScalingPolicies is a list of scaling policies scoped to this task 6710 ScalingPolicies []*ScalingPolicy 6711 6712 // KillSignal is the kill signal to use for the task. This is an optional 6713 // specification and defaults to SIGINT 6714 KillSignal string 6715 6716 // Used internally to manage tasks according to their TaskKind. Initial use case 6717 // is for Consul Connect 6718 Kind TaskKind 6719 6720 // CSIPluginConfig is used to configure the plugin supervisor for the task. 6721 CSIPluginConfig *TaskCSIPluginConfig 6722} 6723 6724// UsesConnect is for conveniently detecting if the Task is able to make use 6725// of Consul Connect features. This will be indicated in the TaskKind of the 6726// Task, which exports known types of Tasks. UsesConnect will be true if the 6727// task is a connect proxy, connect native, or is a connect gateway. 6728func (t *Task) UsesConnect() bool { 6729 return t.Kind.IsConnectNative() || t.UsesConnectSidecar() 6730} 6731 6732func (t *Task) UsesConnectSidecar() bool { 6733 return t.Kind.IsConnectProxy() || t.Kind.IsAnyConnectGateway() 6734} 6735 6736func (t *Task) Copy() *Task { 6737 if t == nil { 6738 return nil 6739 } 6740 nt := new(Task) 6741 *nt = *t 6742 nt.Env = helper.CopyMapStringString(nt.Env) 6743 6744 if t.Services != nil { 6745 services := make([]*Service, len(nt.Services)) 6746 for i, s := range nt.Services { 6747 services[i] = s.Copy() 6748 } 6749 nt.Services = services 6750 } 6751 6752 nt.Constraints = CopySliceConstraints(nt.Constraints) 6753 nt.Affinities = CopySliceAffinities(nt.Affinities) 6754 nt.VolumeMounts = CopySliceVolumeMount(nt.VolumeMounts) 6755 nt.CSIPluginConfig = nt.CSIPluginConfig.Copy() 6756 6757 nt.Vault = nt.Vault.Copy() 6758 nt.Resources = nt.Resources.Copy() 6759 nt.LogConfig = nt.LogConfig.Copy() 6760 nt.Meta = helper.CopyMapStringString(nt.Meta) 6761 nt.DispatchPayload = nt.DispatchPayload.Copy() 6762 nt.Lifecycle = nt.Lifecycle.Copy() 6763 6764 if t.Artifacts != nil { 6765 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 6766 for _, a := range nt.Artifacts { 6767 artifacts = append(artifacts, a.Copy()) 6768 } 6769 nt.Artifacts = artifacts 6770 } 6771 6772 if i, err := copystructure.Copy(nt.Config); err != nil { 6773 panic(err.Error()) 6774 } else { 6775 nt.Config = i.(map[string]interface{}) 6776 } 6777 6778 if t.Templates != nil { 6779 templates := make([]*Template, len(t.Templates)) 6780 for i, tmpl := range nt.Templates { 6781 templates[i] = tmpl.Copy() 6782 } 6783 nt.Templates = templates 6784 } 6785 6786 return nt 6787} 6788 6789// Canonicalize canonicalizes fields in the task. 6790func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 6791 // Ensure that an empty and nil map are treated the same to avoid scheduling 6792 // problems since we use reflect DeepEquals. 6793 if len(t.Meta) == 0 { 6794 t.Meta = nil 6795 } 6796 if len(t.Config) == 0 { 6797 t.Config = nil 6798 } 6799 if len(t.Env) == 0 { 6800 t.Env = nil 6801 } 6802 6803 for _, service := range t.Services { 6804 service.Canonicalize(job.Name, tg.Name, t.Name) 6805 } 6806 6807 // If Resources are nil initialize them to defaults, otherwise canonicalize 6808 if t.Resources == nil { 6809 t.Resources = DefaultResources() 6810 } else { 6811 t.Resources.Canonicalize() 6812 } 6813 6814 if t.RestartPolicy == nil { 6815 t.RestartPolicy = tg.RestartPolicy 6816 } 6817 6818 // Set the default timeout if it is not specified. 6819 if t.KillTimeout == 0 { 6820 t.KillTimeout = DefaultKillTimeout 6821 } 6822 6823 if t.Vault != nil { 6824 t.Vault.Canonicalize() 6825 } 6826 6827 for _, template := range t.Templates { 6828 template.Canonicalize() 6829 } 6830} 6831 6832func (t *Task) GoString() string { 6833 return fmt.Sprintf("*%#v", *t) 6834} 6835 6836// Validate is used to check a task for reasonable configuration 6837func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices []*Service, tgNetworks Networks) error { 6838 var mErr multierror.Error 6839 if t.Name == "" { 6840 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 6841 } 6842 if strings.ContainsAny(t.Name, `/\`) { 6843 // We enforce this so that when creating the directory on disk it will 6844 // not have any slashes. 6845 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 6846 } else if strings.Contains(t.Name, "\000") { 6847 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include null characters")) 6848 } 6849 if t.Driver == "" { 6850 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 6851 } 6852 if t.KillTimeout < 0 { 6853 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 6854 } 6855 if t.ShutdownDelay < 0 { 6856 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 6857 } 6858 6859 // Validate the resources. 6860 if t.Resources == nil { 6861 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 6862 } else if err := t.Resources.Validate(); err != nil { 6863 mErr.Errors = append(mErr.Errors, err) 6864 } 6865 6866 // Validate the log config 6867 if t.LogConfig == nil { 6868 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 6869 } else if err := t.LogConfig.Validate(); err != nil { 6870 mErr.Errors = append(mErr.Errors, err) 6871 } 6872 6873 for idx, constr := range t.Constraints { 6874 if err := constr.Validate(); err != nil { 6875 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 6876 mErr.Errors = append(mErr.Errors, outer) 6877 } 6878 6879 switch constr.Operand { 6880 case ConstraintDistinctHosts, ConstraintDistinctProperty: 6881 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 6882 mErr.Errors = append(mErr.Errors, outer) 6883 } 6884 } 6885 6886 if jobType == JobTypeSystem { 6887 if t.Affinities != nil { 6888 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 6889 } 6890 } else { 6891 for idx, affinity := range t.Affinities { 6892 if err := affinity.Validate(); err != nil { 6893 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 6894 mErr.Errors = append(mErr.Errors, outer) 6895 } 6896 } 6897 } 6898 6899 // Validate Services 6900 if err := validateServices(t, tgNetworks); err != nil { 6901 mErr.Errors = append(mErr.Errors, err) 6902 } 6903 6904 if t.LogConfig != nil && ephemeralDisk != nil { 6905 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 6906 if ephemeralDisk.SizeMB <= logUsage { 6907 mErr.Errors = append(mErr.Errors, 6908 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 6909 logUsage, ephemeralDisk.SizeMB)) 6910 } 6911 } 6912 6913 for idx, artifact := range t.Artifacts { 6914 if err := artifact.Validate(); err != nil { 6915 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 6916 mErr.Errors = append(mErr.Errors, outer) 6917 } 6918 } 6919 6920 if t.Vault != nil { 6921 if err := t.Vault.Validate(); err != nil { 6922 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 6923 } 6924 } 6925 6926 destinations := make(map[string]int, len(t.Templates)) 6927 for idx, tmpl := range t.Templates { 6928 if err := tmpl.Validate(); err != nil { 6929 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 6930 mErr.Errors = append(mErr.Errors, outer) 6931 } 6932 6933 if other, ok := destinations[tmpl.DestPath]; ok { 6934 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 6935 mErr.Errors = append(mErr.Errors, outer) 6936 } else { 6937 destinations[tmpl.DestPath] = idx + 1 6938 } 6939 } 6940 6941 // Validate the dispatch payload block if there 6942 if t.DispatchPayload != nil { 6943 if err := t.DispatchPayload.Validate(); err != nil { 6944 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 6945 } 6946 } 6947 6948 // Validate the Lifecycle block if there 6949 if t.Lifecycle != nil { 6950 if err := t.Lifecycle.Validate(); err != nil { 6951 mErr.Errors = append(mErr.Errors, fmt.Errorf("Lifecycle validation failed: %v", err)) 6952 } 6953 6954 } 6955 6956 // Validation for TaskKind field which is used for Consul Connect integration 6957 if t.Kind.IsConnectProxy() { 6958 // This task is a Connect proxy so it should not have service stanzas 6959 if len(t.Services) > 0 { 6960 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have a service stanza")) 6961 } 6962 if t.Leader { 6963 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have leader set")) 6964 } 6965 6966 // Ensure the proxy task has a corresponding service entry 6967 serviceErr := ValidateConnectProxyService(t.Kind.Value(), tgServices) 6968 if serviceErr != nil { 6969 mErr.Errors = append(mErr.Errors, serviceErr) 6970 } 6971 } 6972 6973 // Validation for volumes 6974 for idx, vm := range t.VolumeMounts { 6975 if !MountPropagationModeIsValid(vm.PropagationMode) { 6976 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode)) 6977 } 6978 } 6979 6980 // Validate CSI Plugin Config 6981 if t.CSIPluginConfig != nil { 6982 if t.CSIPluginConfig.ID == "" { 6983 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig must have a non-empty PluginID")) 6984 } 6985 6986 if !CSIPluginTypeIsValid(t.CSIPluginConfig.Type) { 6987 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig PluginType must be one of 'node', 'controller', or 'monolith', got: \"%s\"", t.CSIPluginConfig.Type)) 6988 } 6989 6990 // TODO: Investigate validation of the PluginMountDir. Not much we can do apart from check IsAbs until after we understand its execution environment though :( 6991 } 6992 6993 return mErr.ErrorOrNil() 6994} 6995 6996// validateServices takes a task and validates the services within it are valid 6997// and reference ports that exist. 6998func validateServices(t *Task, tgNetworks Networks) error { 6999 var mErr multierror.Error 7000 7001 // Ensure that services don't ask for nonexistent ports and their names are 7002 // unique. 7003 servicePorts := make(map[string]map[string]struct{}) 7004 addServicePort := func(label, service string) { 7005 if _, ok := servicePorts[label]; !ok { 7006 servicePorts[label] = map[string]struct{}{} 7007 } 7008 servicePorts[label][service] = struct{}{} 7009 } 7010 knownServices := make(map[string]struct{}) 7011 for i, service := range t.Services { 7012 if err := service.Validate(); err != nil { 7013 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 7014 mErr.Errors = append(mErr.Errors, outer) 7015 } 7016 7017 if service.AddressMode == AddressModeAlloc { 7018 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"alloc\", only services defined in a \"group\" block can use this mode", service.Name)) 7019 } 7020 7021 // Ensure that services with the same name are not being registered for 7022 // the same port 7023 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 7024 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 7025 } 7026 knownServices[service.Name+service.PortLabel] = struct{}{} 7027 7028 if service.PortLabel != "" { 7029 if service.AddressMode == "driver" { 7030 // Numeric port labels are valid for address_mode=driver 7031 _, err := strconv.Atoi(service.PortLabel) 7032 if err != nil { 7033 // Not a numeric port label, add it to list to check 7034 addServicePort(service.PortLabel, service.Name) 7035 } 7036 } else { 7037 addServicePort(service.PortLabel, service.Name) 7038 } 7039 } 7040 7041 // connect block is only allowed on group level 7042 if service.Connect != nil { 7043 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot have \"connect\" block, only services defined in a \"group\" block can", service.Name)) 7044 } 7045 7046 // Ensure that check names are unique and have valid ports 7047 knownChecks := make(map[string]struct{}) 7048 for _, check := range service.Checks { 7049 if _, ok := knownChecks[check.Name]; ok { 7050 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 7051 } 7052 knownChecks[check.Name] = struct{}{} 7053 7054 if check.AddressMode == AddressModeAlloc { 7055 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q cannot use address_mode=\"alloc\", only checks defined in a \"group\" service block can use this mode", service.Name)) 7056 } 7057 7058 if !check.RequiresPort() { 7059 // No need to continue validating check if it doesn't need a port 7060 continue 7061 } 7062 7063 effectivePort := check.PortLabel 7064 if effectivePort == "" { 7065 // Inherits from service 7066 effectivePort = service.PortLabel 7067 } 7068 7069 if effectivePort == "" { 7070 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 7071 continue 7072 } 7073 7074 isNumeric := false 7075 portNumber, err := strconv.Atoi(effectivePort) 7076 if err == nil { 7077 isNumeric = true 7078 } 7079 7080 // Numeric ports are fine for address_mode = "driver" 7081 if check.AddressMode == "driver" && isNumeric { 7082 if portNumber <= 0 { 7083 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 7084 } 7085 continue 7086 } 7087 7088 if isNumeric { 7089 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 7090 continue 7091 } 7092 7093 // PortLabel must exist, report errors by its parent service 7094 addServicePort(effectivePort, service.Name) 7095 } 7096 } 7097 7098 // Get the set of group port labels. 7099 portLabels := make(map[string]struct{}) 7100 if len(tgNetworks) > 0 { 7101 ports := tgNetworks[0].PortLabels() 7102 for portLabel := range ports { 7103 portLabels[portLabel] = struct{}{} 7104 } 7105 } 7106 7107 // COMPAT(0.13) 7108 // Append the set of task port labels. (Note that network resources on the 7109 // task resources are deprecated, but we must let them continue working; a 7110 // warning will be emitted on job submission). 7111 if t.Resources != nil { 7112 for _, network := range t.Resources.Networks { 7113 for portLabel := range network.PortLabels() { 7114 portLabels[portLabel] = struct{}{} 7115 } 7116 } 7117 } 7118 7119 // Iterate over a sorted list of keys to make error listings stable 7120 keys := make([]string, 0, len(servicePorts)) 7121 for p := range servicePorts { 7122 keys = append(keys, p) 7123 } 7124 sort.Strings(keys) 7125 7126 // Ensure all ports referenced in services exist. 7127 for _, servicePort := range keys { 7128 services := servicePorts[servicePort] 7129 _, ok := portLabels[servicePort] 7130 if !ok { 7131 names := make([]string, 0, len(services)) 7132 for name := range services { 7133 names = append(names, name) 7134 } 7135 7136 // Keep order deterministic 7137 sort.Strings(names) 7138 joined := strings.Join(names, ", ") 7139 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 7140 mErr.Errors = append(mErr.Errors, err) 7141 } 7142 } 7143 7144 // Ensure address mode is valid 7145 return mErr.ErrorOrNil() 7146} 7147 7148func (t *Task) Warnings() error { 7149 var mErr multierror.Error 7150 7151 // Validate the resources 7152 if t.Resources != nil && t.Resources.IOPS != 0 { 7153 mErr.Errors = append(mErr.Errors, fmt.Errorf("IOPS has been deprecated as of Nomad 0.9.0. Please remove IOPS from resource stanza.")) 7154 } 7155 7156 if t.Resources != nil && len(t.Resources.Networks) != 0 { 7157 mErr.Errors = append(mErr.Errors, fmt.Errorf("task network resources have been deprecated as of Nomad 0.12.0. Please configure networking via group network block.")) 7158 } 7159 7160 for idx, tmpl := range t.Templates { 7161 if err := tmpl.Warnings(); err != nil { 7162 err = multierror.Prefix(err, fmt.Sprintf("Template[%d]", idx)) 7163 mErr.Errors = append(mErr.Errors, err) 7164 } 7165 } 7166 7167 return mErr.ErrorOrNil() 7168} 7169 7170// TaskKind identifies the special kinds of tasks using the following format: 7171// '<kind_name>(:<identifier>)`. The TaskKind can optionally include an identifier that 7172// is opaque to the Task. This identifier can be used to relate the task to some 7173// other entity based on the kind. 7174// 7175// For example, a task may have the TaskKind of `connect-proxy:service` where 7176// 'connect-proxy' is the kind name and 'service' is the identifier that relates the 7177// task to the service name of which it is a connect proxy for. 7178type TaskKind string 7179 7180func NewTaskKind(name, identifier string) TaskKind { 7181 return TaskKind(fmt.Sprintf("%s:%s", name, identifier)) 7182} 7183 7184// Name returns the kind name portion of the TaskKind 7185func (k TaskKind) Name() string { 7186 return strings.Split(string(k), ":")[0] 7187} 7188 7189// Value returns the identifier of the TaskKind or an empty string if it doesn't 7190// include one. 7191func (k TaskKind) Value() string { 7192 if s := strings.SplitN(string(k), ":", 2); len(s) > 1 { 7193 return s[1] 7194 } 7195 return "" 7196} 7197 7198func (k TaskKind) hasPrefix(prefix string) bool { 7199 return strings.HasPrefix(string(k), prefix+":") && len(k) > len(prefix)+1 7200} 7201 7202// IsConnectProxy returns true if the TaskKind is connect-proxy. 7203func (k TaskKind) IsConnectProxy() bool { 7204 return k.hasPrefix(ConnectProxyPrefix) 7205} 7206 7207// IsConnectNative returns true if the TaskKind is connect-native. 7208func (k TaskKind) IsConnectNative() bool { 7209 return k.hasPrefix(ConnectNativePrefix) 7210} 7211 7212func (k TaskKind) IsConnectIngress() bool { 7213 return k.hasPrefix(ConnectIngressPrefix) 7214} 7215 7216func (k TaskKind) IsConnectTerminating() bool { 7217 return k.hasPrefix(ConnectTerminatingPrefix) 7218} 7219 7220func (k TaskKind) IsAnyConnectGateway() bool { 7221 switch { 7222 case k.IsConnectIngress(): 7223 return true 7224 case k.IsConnectTerminating(): 7225 return true 7226 default: 7227 return false 7228 } 7229} 7230 7231const ( 7232 // ConnectProxyPrefix is the prefix used for fields referencing a Consul Connect 7233 // Proxy 7234 ConnectProxyPrefix = "connect-proxy" 7235 7236 // ConnectNativePrefix is the prefix used for fields referencing a Connect 7237 // Native Task 7238 ConnectNativePrefix = "connect-native" 7239 7240 // ConnectIngressPrefix is the prefix used for fields referencing a Consul 7241 // Connect Ingress Gateway Proxy. 7242 ConnectIngressPrefix = "connect-ingress" 7243 7244 // ConnectTerminatingPrefix is the prefix used for fields referencing a Consul 7245 // Connect Terminating Gateway Proxy. 7246 // 7247 ConnectTerminatingPrefix = "connect-terminating" 7248 7249 // ConnectMeshPrefix is the prefix used for fields referencing a Consul Connect 7250 // Mesh Gateway Proxy. 7251 // 7252 // Not yet supported. 7253 // ConnectMeshPrefix = "connect-mesh" 7254) 7255 7256// ValidateConnectProxyService checks that the service that is being 7257// proxied by this task exists in the task group and contains 7258// valid Connect config. 7259func ValidateConnectProxyService(serviceName string, tgServices []*Service) error { 7260 found := false 7261 names := make([]string, 0, len(tgServices)) 7262 for _, svc := range tgServices { 7263 if svc.Connect == nil || svc.Connect.SidecarService == nil { 7264 continue 7265 } 7266 7267 if svc.Name == serviceName { 7268 found = true 7269 break 7270 } 7271 7272 // Build up list of mismatched Connect service names for error 7273 // reporting. 7274 names = append(names, svc.Name) 7275 } 7276 7277 if !found { 7278 if len(names) == 0 { 7279 return fmt.Errorf("No Connect services in task group with Connect proxy (%q)", serviceName) 7280 } else { 7281 return fmt.Errorf("Connect proxy service name (%q) not found in Connect services from task group: %s", serviceName, names) 7282 } 7283 } 7284 7285 return nil 7286} 7287 7288const ( 7289 // TemplateChangeModeNoop marks that no action should be taken if the 7290 // template is re-rendered 7291 TemplateChangeModeNoop = "noop" 7292 7293 // TemplateChangeModeSignal marks that the task should be signaled if the 7294 // template is re-rendered 7295 TemplateChangeModeSignal = "signal" 7296 7297 // TemplateChangeModeRestart marks that the task should be restarted if the 7298 // template is re-rendered 7299 TemplateChangeModeRestart = "restart" 7300) 7301 7302var ( 7303 // TemplateChangeModeInvalidError is the error for when an invalid change 7304 // mode is given 7305 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 7306) 7307 7308// Template represents a template configuration to be rendered for a given task 7309type Template struct { 7310 // SourcePath is the path to the template to be rendered 7311 SourcePath string 7312 7313 // DestPath is the path to where the template should be rendered 7314 DestPath string 7315 7316 // EmbeddedTmpl store the raw template. This is useful for smaller templates 7317 // where they are embedded in the job file rather than sent as an artifact 7318 EmbeddedTmpl string 7319 7320 // ChangeMode indicates what should be done if the template is re-rendered 7321 ChangeMode string 7322 7323 // ChangeSignal is the signal that should be sent if the change mode 7324 // requires it. 7325 ChangeSignal string 7326 7327 // Splay is used to avoid coordinated restarts of processes by applying a 7328 // random wait between 0 and the given splay value before signalling the 7329 // application of a change 7330 Splay time.Duration 7331 7332 // Perms is the permission the file should be written out with. 7333 Perms string 7334 7335 // LeftDelim and RightDelim are optional configurations to control what 7336 // delimiter is utilized when parsing the template. 7337 LeftDelim string 7338 RightDelim string 7339 7340 // Envvars enables exposing the template as environment variables 7341 // instead of as a file. The template must be of the form: 7342 // 7343 // VAR_NAME_1={{ key service/my-key }} 7344 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 7345 // 7346 // Lines will be split on the initial "=" with the first part being the 7347 // key name and the second part the value. 7348 // Empty lines and lines starting with # will be ignored, but to avoid 7349 // escaping issues #s within lines will not be treated as comments. 7350 Envvars bool 7351 7352 // VaultGrace is the grace duration between lease renewal and reacquiring a 7353 // secret. If the lease of a secret is less than the grace, a new secret is 7354 // acquired. 7355 // COMPAT(0.12) VaultGrace has been ignored by Vault since Vault v0.5. 7356 VaultGrace time.Duration 7357} 7358 7359// DefaultTemplate returns a default template. 7360func DefaultTemplate() *Template { 7361 return &Template{ 7362 ChangeMode: TemplateChangeModeRestart, 7363 Splay: 5 * time.Second, 7364 Perms: "0644", 7365 } 7366} 7367 7368func (t *Template) Copy() *Template { 7369 if t == nil { 7370 return nil 7371 } 7372 copy := new(Template) 7373 *copy = *t 7374 return copy 7375} 7376 7377func (t *Template) Canonicalize() { 7378 if t.ChangeSignal != "" { 7379 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 7380 } 7381} 7382 7383func (t *Template) Validate() error { 7384 var mErr multierror.Error 7385 7386 // Verify we have something to render 7387 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 7388 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 7389 } 7390 7391 // Verify we can render somewhere 7392 if t.DestPath == "" { 7393 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 7394 } 7395 7396 // Verify the destination doesn't escape 7397 escaped, err := PathEscapesAllocDir("task", t.DestPath) 7398 if err != nil { 7399 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 7400 } else if escaped { 7401 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 7402 } 7403 7404 // Verify a proper change mode 7405 switch t.ChangeMode { 7406 case TemplateChangeModeNoop, TemplateChangeModeRestart: 7407 case TemplateChangeModeSignal: 7408 if t.ChangeSignal == "" { 7409 _ = multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 7410 } 7411 if t.Envvars { 7412 _ = multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 7413 } 7414 default: 7415 _ = multierror.Append(&mErr, TemplateChangeModeInvalidError) 7416 } 7417 7418 // Verify the splay is positive 7419 if t.Splay < 0 { 7420 _ = multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 7421 } 7422 7423 // Verify the permissions 7424 if t.Perms != "" { 7425 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 7426 _ = multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 7427 } 7428 } 7429 7430 return mErr.ErrorOrNil() 7431} 7432 7433func (t *Template) Warnings() error { 7434 var mErr multierror.Error 7435 7436 // Deprecation notice for vault_grace 7437 if t.VaultGrace != 0 { 7438 mErr.Errors = append(mErr.Errors, fmt.Errorf("VaultGrace has been deprecated as of Nomad 0.11 and ignored since Vault 0.5. Please remove VaultGrace / vault_grace from template stanza.")) 7439 } 7440 7441 return mErr.ErrorOrNil() 7442} 7443 7444// AllocState records a single event that changes the state of the whole allocation 7445type AllocStateField uint8 7446 7447const ( 7448 AllocStateFieldClientStatus AllocStateField = iota 7449) 7450 7451type AllocState struct { 7452 Field AllocStateField 7453 Value string 7454 Time time.Time 7455} 7456 7457// TaskHandle is optional handle to a task propogated to the servers for use 7458// by remote tasks. Since remote tasks are not implicitly lost when the node 7459// they are assigned to is down, their state is migrated to the replacement 7460// allocation. 7461// 7462// Minimal set of fields from plugins/drivers/task_handle.go:TaskHandle 7463type TaskHandle struct { 7464 // Version of driver state. Used by the driver to gracefully handle 7465 // plugin upgrades. 7466 Version int 7467 7468 // Driver-specific state containing a handle to the remote task. 7469 DriverState []byte 7470} 7471 7472func (h *TaskHandle) Copy() *TaskHandle { 7473 if h == nil { 7474 return nil 7475 } 7476 7477 newTH := TaskHandle{ 7478 Version: h.Version, 7479 DriverState: make([]byte, len(h.DriverState)), 7480 } 7481 copy(newTH.DriverState, h.DriverState) 7482 return &newTH 7483} 7484 7485// Set of possible states for a task. 7486const ( 7487 TaskStatePending = "pending" // The task is waiting to be run. 7488 TaskStateRunning = "running" // The task is currently running. 7489 TaskStateDead = "dead" // Terminal state of task. 7490) 7491 7492// TaskState tracks the current state of a task and events that caused state 7493// transitions. 7494type TaskState struct { 7495 // The current state of the task. 7496 State string 7497 7498 // Failed marks a task as having failed 7499 Failed bool 7500 7501 // Restarts is the number of times the task has restarted 7502 Restarts uint64 7503 7504 // LastRestart is the time the task last restarted. It is updated each time the 7505 // task restarts 7506 LastRestart time.Time 7507 7508 // StartedAt is the time the task is started. It is updated each time the 7509 // task starts 7510 StartedAt time.Time 7511 7512 // FinishedAt is the time at which the task transitioned to dead and will 7513 // not be started again. 7514 FinishedAt time.Time 7515 7516 // Series of task events that transition the state of the task. 7517 Events []*TaskEvent 7518 7519 // Experimental - TaskHandle is based on drivers.TaskHandle and used 7520 // by remote task drivers to migrate task handles between allocations. 7521 TaskHandle *TaskHandle 7522} 7523 7524// NewTaskState returns a TaskState initialized in the Pending state. 7525func NewTaskState() *TaskState { 7526 return &TaskState{ 7527 State: TaskStatePending, 7528 } 7529} 7530 7531// Canonicalize ensures the TaskState has a State set. It should default to 7532// Pending. 7533func (ts *TaskState) Canonicalize() { 7534 if ts.State == "" { 7535 ts.State = TaskStatePending 7536 } 7537} 7538 7539func (ts *TaskState) Copy() *TaskState { 7540 if ts == nil { 7541 return nil 7542 } 7543 newTS := new(TaskState) 7544 *newTS = *ts 7545 7546 if ts.Events != nil { 7547 newTS.Events = make([]*TaskEvent, len(ts.Events)) 7548 for i, e := range ts.Events { 7549 newTS.Events[i] = e.Copy() 7550 } 7551 } 7552 7553 newTS.TaskHandle = ts.TaskHandle.Copy() 7554 return newTS 7555} 7556 7557// Successful returns whether a task finished successfully. This doesn't really 7558// have meaning on a non-batch allocation because a service and system 7559// allocation should not finish. 7560func (ts *TaskState) Successful() bool { 7561 return ts.State == TaskStateDead && !ts.Failed 7562} 7563 7564const ( 7565 // TaskSetupFailure indicates that the task could not be started due to a 7566 // a setup failure. 7567 TaskSetupFailure = "Setup Failure" 7568 7569 // TaskDriveFailure indicates that the task could not be started due to a 7570 // failure in the driver. TaskDriverFailure is considered Recoverable. 7571 TaskDriverFailure = "Driver Failure" 7572 7573 // TaskReceived signals that the task has been pulled by the client at the 7574 // given timestamp. 7575 TaskReceived = "Received" 7576 7577 // TaskFailedValidation indicates the task was invalid and as such was not run. 7578 // TaskFailedValidation is not considered Recoverable. 7579 TaskFailedValidation = "Failed Validation" 7580 7581 // TaskStarted signals that the task was started and its timestamp can be 7582 // used to determine the running length of the task. 7583 TaskStarted = "Started" 7584 7585 // TaskTerminated indicates that the task was started and exited. 7586 TaskTerminated = "Terminated" 7587 7588 // TaskKilling indicates a kill signal has been sent to the task. 7589 TaskKilling = "Killing" 7590 7591 // TaskKilled indicates a user has killed the task. 7592 TaskKilled = "Killed" 7593 7594 // TaskRestarting indicates that task terminated and is being restarted. 7595 TaskRestarting = "Restarting" 7596 7597 // TaskNotRestarting indicates that the task has failed and is not being 7598 // restarted because it has exceeded its restart policy. 7599 TaskNotRestarting = "Not Restarting" 7600 7601 // TaskRestartSignal indicates that the task has been signalled to be 7602 // restarted 7603 TaskRestartSignal = "Restart Signaled" 7604 7605 // TaskSignaling indicates that the task is being signalled. 7606 TaskSignaling = "Signaling" 7607 7608 // TaskDownloadingArtifacts means the task is downloading the artifacts 7609 // specified in the task. 7610 TaskDownloadingArtifacts = "Downloading Artifacts" 7611 7612 // TaskArtifactDownloadFailed indicates that downloading the artifacts 7613 // failed. 7614 TaskArtifactDownloadFailed = "Failed Artifact Download" 7615 7616 // TaskBuildingTaskDir indicates that the task directory/chroot is being 7617 // built. 7618 TaskBuildingTaskDir = "Building Task Directory" 7619 7620 // TaskSetup indicates the task runner is setting up the task environment 7621 TaskSetup = "Task Setup" 7622 7623 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 7624 // exceeded the requested disk resources. 7625 TaskDiskExceeded = "Disk Resources Exceeded" 7626 7627 // TaskSiblingFailed indicates that a sibling task in the task group has 7628 // failed. 7629 TaskSiblingFailed = "Sibling Task Failed" 7630 7631 // TaskDriverMessage is an informational event message emitted by 7632 // drivers such as when they're performing a long running action like 7633 // downloading an image. 7634 TaskDriverMessage = "Driver" 7635 7636 // TaskLeaderDead indicates that the leader task within the has finished. 7637 TaskLeaderDead = "Leader Task Dead" 7638 7639 // TaskMainDead indicates that the main tasks have dead 7640 TaskMainDead = "Main Tasks Dead" 7641 7642 // TaskHookFailed indicates that one of the hooks for a task failed. 7643 TaskHookFailed = "Task hook failed" 7644 7645 // TaskRestoreFailed indicates Nomad was unable to reattach to a 7646 // restored task. 7647 TaskRestoreFailed = "Failed Restoring Task" 7648 7649 // TaskPluginUnhealthy indicates that a plugin managed by Nomad became unhealthy 7650 TaskPluginUnhealthy = "Plugin became unhealthy" 7651 7652 // TaskPluginHealthy indicates that a plugin managed by Nomad became healthy 7653 TaskPluginHealthy = "Plugin became healthy" 7654) 7655 7656// TaskEvent is an event that effects the state of a task and contains meta-data 7657// appropriate to the events type. 7658type TaskEvent struct { 7659 Type string 7660 Time int64 // Unix Nanosecond timestamp 7661 7662 Message string // A possible message explaining the termination of the task. 7663 7664 // DisplayMessage is a human friendly message about the event 7665 DisplayMessage string 7666 7667 // Details is a map with annotated info about the event 7668 Details map[string]string 7669 7670 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 7671 // in a future release. Field values are available in the Details map. 7672 7673 // FailsTask marks whether this event fails the task. 7674 // Deprecated, use Details["fails_task"] to access this. 7675 FailsTask bool 7676 7677 // Restart fields. 7678 // Deprecated, use Details["restart_reason"] to access this. 7679 RestartReason string 7680 7681 // Setup Failure fields. 7682 // Deprecated, use Details["setup_error"] to access this. 7683 SetupError string 7684 7685 // Driver Failure fields. 7686 // Deprecated, use Details["driver_error"] to access this. 7687 DriverError string // A driver error occurred while starting the task. 7688 7689 // Task Terminated Fields. 7690 7691 // Deprecated, use Details["exit_code"] to access this. 7692 ExitCode int // The exit code of the task. 7693 7694 // Deprecated, use Details["signal"] to access this. 7695 Signal int // The signal that terminated the task. 7696 7697 // Killing fields 7698 // Deprecated, use Details["kill_timeout"] to access this. 7699 KillTimeout time.Duration 7700 7701 // Task Killed Fields. 7702 // Deprecated, use Details["kill_error"] to access this. 7703 KillError string // Error killing the task. 7704 7705 // KillReason is the reason the task was killed 7706 // Deprecated, use Details["kill_reason"] to access this. 7707 KillReason string 7708 7709 // TaskRestarting fields. 7710 // Deprecated, use Details["start_delay"] to access this. 7711 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 7712 7713 // Artifact Download fields 7714 // Deprecated, use Details["download_error"] to access this. 7715 DownloadError string // Error downloading artifacts 7716 7717 // Validation fields 7718 // Deprecated, use Details["validation_error"] to access this. 7719 ValidationError string // Validation error 7720 7721 // The maximum allowed task disk size. 7722 // Deprecated, use Details["disk_limit"] to access this. 7723 DiskLimit int64 7724 7725 // Name of the sibling task that caused termination of the task that 7726 // the TaskEvent refers to. 7727 // Deprecated, use Details["failed_sibling"] to access this. 7728 FailedSibling string 7729 7730 // VaultError is the error from token renewal 7731 // Deprecated, use Details["vault_renewal_error"] to access this. 7732 VaultError string 7733 7734 // TaskSignalReason indicates the reason the task is being signalled. 7735 // Deprecated, use Details["task_signal_reason"] to access this. 7736 TaskSignalReason string 7737 7738 // TaskSignal is the signal that was sent to the task 7739 // Deprecated, use Details["task_signal"] to access this. 7740 TaskSignal string 7741 7742 // DriverMessage indicates a driver action being taken. 7743 // Deprecated, use Details["driver_message"] to access this. 7744 DriverMessage string 7745 7746 // GenericSource is the source of a message. 7747 // Deprecated, is redundant with event type. 7748 GenericSource string 7749} 7750 7751func (event *TaskEvent) PopulateEventDisplayMessage() { 7752 // Build up the description based on the event type. 7753 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 7754 return 7755 } 7756 7757 if event.DisplayMessage != "" { 7758 return 7759 } 7760 7761 var desc string 7762 switch event.Type { 7763 case TaskSetup: 7764 desc = event.Message 7765 case TaskStarted: 7766 desc = "Task started by client" 7767 case TaskReceived: 7768 desc = "Task received by client" 7769 case TaskFailedValidation: 7770 if event.ValidationError != "" { 7771 desc = event.ValidationError 7772 } else { 7773 desc = "Validation of task failed" 7774 } 7775 case TaskSetupFailure: 7776 if event.SetupError != "" { 7777 desc = event.SetupError 7778 } else { 7779 desc = "Task setup failed" 7780 } 7781 case TaskDriverFailure: 7782 if event.DriverError != "" { 7783 desc = event.DriverError 7784 } else { 7785 desc = "Failed to start task" 7786 } 7787 case TaskDownloadingArtifacts: 7788 desc = "Client is downloading artifacts" 7789 case TaskArtifactDownloadFailed: 7790 if event.DownloadError != "" { 7791 desc = event.DownloadError 7792 } else { 7793 desc = "Failed to download artifacts" 7794 } 7795 case TaskKilling: 7796 if event.KillReason != "" { 7797 desc = event.KillReason 7798 } else if event.KillTimeout != 0 { 7799 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 7800 } else { 7801 desc = "Sent interrupt" 7802 } 7803 case TaskKilled: 7804 if event.KillError != "" { 7805 desc = event.KillError 7806 } else { 7807 desc = "Task successfully killed" 7808 } 7809 case TaskTerminated: 7810 var parts []string 7811 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 7812 7813 if event.Signal != 0 { 7814 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 7815 } 7816 7817 if event.Message != "" { 7818 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 7819 } 7820 desc = strings.Join(parts, ", ") 7821 case TaskRestarting: 7822 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 7823 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 7824 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 7825 } else { 7826 desc = in 7827 } 7828 case TaskNotRestarting: 7829 if event.RestartReason != "" { 7830 desc = event.RestartReason 7831 } else { 7832 desc = "Task exceeded restart policy" 7833 } 7834 case TaskSiblingFailed: 7835 if event.FailedSibling != "" { 7836 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 7837 } else { 7838 desc = "Task's sibling failed" 7839 } 7840 case TaskSignaling: 7841 sig := event.TaskSignal 7842 reason := event.TaskSignalReason 7843 7844 if sig == "" && reason == "" { 7845 desc = "Task being sent a signal" 7846 } else if sig == "" { 7847 desc = reason 7848 } else if reason == "" { 7849 desc = fmt.Sprintf("Task being sent signal %v", sig) 7850 } else { 7851 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 7852 } 7853 case TaskRestartSignal: 7854 if event.RestartReason != "" { 7855 desc = event.RestartReason 7856 } else { 7857 desc = "Task signaled to restart" 7858 } 7859 case TaskDriverMessage: 7860 desc = event.DriverMessage 7861 case TaskLeaderDead: 7862 desc = "Leader Task in Group dead" 7863 case TaskMainDead: 7864 desc = "Main tasks in the group died" 7865 default: 7866 desc = event.Message 7867 } 7868 7869 event.DisplayMessage = desc 7870} 7871 7872func (te *TaskEvent) GoString() string { 7873 return fmt.Sprintf("%v - %v", te.Time, te.Type) 7874} 7875 7876// SetDisplayMessage sets the display message of TaskEvent 7877func (te *TaskEvent) SetDisplayMessage(msg string) *TaskEvent { 7878 te.DisplayMessage = msg 7879 return te 7880} 7881 7882// SetMessage sets the message of TaskEvent 7883func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 7884 te.Message = msg 7885 te.Details["message"] = msg 7886 return te 7887} 7888 7889func (te *TaskEvent) Copy() *TaskEvent { 7890 if te == nil { 7891 return nil 7892 } 7893 copy := new(TaskEvent) 7894 *copy = *te 7895 return copy 7896} 7897 7898func NewTaskEvent(event string) *TaskEvent { 7899 return &TaskEvent{ 7900 Type: event, 7901 Time: time.Now().UnixNano(), 7902 Details: make(map[string]string), 7903 } 7904} 7905 7906// SetSetupError is used to store an error that occurred while setting up the 7907// task 7908func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 7909 if err != nil { 7910 e.SetupError = err.Error() 7911 e.Details["setup_error"] = err.Error() 7912 } 7913 return e 7914} 7915 7916func (e *TaskEvent) SetFailsTask() *TaskEvent { 7917 e.FailsTask = true 7918 e.Details["fails_task"] = "true" 7919 return e 7920} 7921 7922func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 7923 if err != nil { 7924 e.DriverError = err.Error() 7925 e.Details["driver_error"] = err.Error() 7926 } 7927 return e 7928} 7929 7930func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 7931 e.ExitCode = c 7932 e.Details["exit_code"] = fmt.Sprintf("%d", c) 7933 return e 7934} 7935 7936func (e *TaskEvent) SetSignal(s int) *TaskEvent { 7937 e.Signal = s 7938 e.Details["signal"] = fmt.Sprintf("%d", s) 7939 return e 7940} 7941 7942func (e *TaskEvent) SetSignalText(s string) *TaskEvent { 7943 e.Details["signal"] = s 7944 return e 7945} 7946 7947func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 7948 if err != nil { 7949 e.Message = err.Error() 7950 e.Details["exit_message"] = err.Error() 7951 } 7952 return e 7953} 7954 7955func (e *TaskEvent) SetKillError(err error) *TaskEvent { 7956 if err != nil { 7957 e.KillError = err.Error() 7958 e.Details["kill_error"] = err.Error() 7959 } 7960 return e 7961} 7962 7963func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 7964 e.KillReason = r 7965 e.Details["kill_reason"] = r 7966 return e 7967} 7968 7969func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 7970 e.StartDelay = int64(delay) 7971 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 7972 return e 7973} 7974 7975func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 7976 e.RestartReason = reason 7977 e.Details["restart_reason"] = reason 7978 return e 7979} 7980 7981func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 7982 e.TaskSignalReason = r 7983 e.Details["task_signal_reason"] = r 7984 return e 7985} 7986 7987func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 7988 e.TaskSignal = s.String() 7989 e.Details["task_signal"] = s.String() 7990 return e 7991} 7992 7993func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 7994 if err != nil { 7995 e.DownloadError = err.Error() 7996 e.Details["download_error"] = err.Error() 7997 } 7998 return e 7999} 8000 8001func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 8002 if err != nil { 8003 e.ValidationError = err.Error() 8004 e.Details["validation_error"] = err.Error() 8005 } 8006 return e 8007} 8008 8009func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 8010 e.KillTimeout = timeout 8011 e.Details["kill_timeout"] = timeout.String() 8012 return e 8013} 8014 8015func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 8016 e.DiskLimit = limit 8017 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 8018 return e 8019} 8020 8021func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 8022 e.FailedSibling = sibling 8023 e.Details["failed_sibling"] = sibling 8024 return e 8025} 8026 8027func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 8028 if err != nil { 8029 e.VaultError = err.Error() 8030 e.Details["vault_renewal_error"] = err.Error() 8031 } 8032 return e 8033} 8034 8035func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 8036 e.DriverMessage = m 8037 e.Details["driver_message"] = m 8038 return e 8039} 8040 8041func (e *TaskEvent) SetOOMKilled(oom bool) *TaskEvent { 8042 e.Details["oom_killed"] = strconv.FormatBool(oom) 8043 return e 8044} 8045 8046// TaskArtifact is an artifact to download before running the task. 8047type TaskArtifact struct { 8048 // GetterSource is the source to download an artifact using go-getter 8049 GetterSource string 8050 8051 // GetterOptions are options to use when downloading the artifact using 8052 // go-getter. 8053 GetterOptions map[string]string 8054 8055 // GetterHeaders are headers to use when downloading the artifact using 8056 // go-getter. 8057 GetterHeaders map[string]string 8058 8059 // GetterMode is the go-getter.ClientMode for fetching resources. 8060 // Defaults to "any" but can be set to "file" or "dir". 8061 GetterMode string 8062 8063 // RelativeDest is the download destination given relative to the task's 8064 // directory. 8065 RelativeDest string 8066} 8067 8068func (ta *TaskArtifact) Copy() *TaskArtifact { 8069 if ta == nil { 8070 return nil 8071 } 8072 return &TaskArtifact{ 8073 GetterSource: ta.GetterSource, 8074 GetterOptions: helper.CopyMapStringString(ta.GetterOptions), 8075 GetterHeaders: helper.CopyMapStringString(ta.GetterHeaders), 8076 GetterMode: ta.GetterMode, 8077 RelativeDest: ta.RelativeDest, 8078 } 8079} 8080 8081func (ta *TaskArtifact) GoString() string { 8082 return fmt.Sprintf("%+v", ta) 8083} 8084 8085// hashStringMap appends a deterministic hash of m onto h. 8086func hashStringMap(h hash.Hash, m map[string]string) { 8087 keys := make([]string, 0, len(m)) 8088 for k := range m { 8089 keys = append(keys, k) 8090 } 8091 sort.Strings(keys) 8092 for _, k := range keys { 8093 _, _ = h.Write([]byte(k)) 8094 _, _ = h.Write([]byte(m[k])) 8095 } 8096} 8097 8098// Hash creates a unique identifier for a TaskArtifact as the same GetterSource 8099// may be specified multiple times with different destinations. 8100func (ta *TaskArtifact) Hash() string { 8101 h, err := blake2b.New256(nil) 8102 if err != nil { 8103 panic(err) 8104 } 8105 8106 _, _ = h.Write([]byte(ta.GetterSource)) 8107 8108 hashStringMap(h, ta.GetterOptions) 8109 hashStringMap(h, ta.GetterHeaders) 8110 8111 _, _ = h.Write([]byte(ta.GetterMode)) 8112 _, _ = h.Write([]byte(ta.RelativeDest)) 8113 return base64.RawStdEncoding.EncodeToString(h.Sum(nil)) 8114} 8115 8116// PathEscapesAllocDir returns if the given path escapes the allocation 8117// directory. 8118// 8119// The prefix is to joined to the path (e.g. "task/local"), and this function 8120// checks if path escapes the alloc dir, NOT the prefix directory within the alloc dir. 8121// With prefix="task/local", it will return false for "../secret", but 8122// true for "../../../../../../root" path; only the latter escapes the alloc dir 8123func PathEscapesAllocDir(prefix, path string) (bool, error) { 8124 // Verify the destination doesn't escape the tasks directory 8125 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 8126 if err != nil { 8127 return false, err 8128 } 8129 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 8130 if err != nil { 8131 return false, err 8132 } 8133 rel, err := filepath.Rel(alloc, abs) 8134 if err != nil { 8135 return false, err 8136 } 8137 8138 return strings.HasPrefix(rel, ".."), nil 8139} 8140 8141func (ta *TaskArtifact) Validate() error { 8142 // Verify the source 8143 var mErr multierror.Error 8144 if ta.GetterSource == "" { 8145 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 8146 } 8147 8148 switch ta.GetterMode { 8149 case "": 8150 // Default to any 8151 ta.GetterMode = GetterModeAny 8152 case GetterModeAny, GetterModeFile, GetterModeDir: 8153 // Ok 8154 default: 8155 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 8156 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 8157 } 8158 8159 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 8160 if err != nil { 8161 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 8162 } else if escaped { 8163 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 8164 } 8165 8166 if err := ta.validateChecksum(); err != nil { 8167 mErr.Errors = append(mErr.Errors, err) 8168 } 8169 8170 return mErr.ErrorOrNil() 8171} 8172 8173func (ta *TaskArtifact) validateChecksum() error { 8174 check, ok := ta.GetterOptions["checksum"] 8175 if !ok { 8176 return nil 8177 } 8178 8179 // Job struct validation occurs before interpolation resolution can be effective. 8180 // Skip checking if checksum contain variable reference, and artifacts fetching will 8181 // eventually fail, if checksum is indeed invalid. 8182 if args.ContainsEnv(check) { 8183 return nil 8184 } 8185 8186 check = strings.TrimSpace(check) 8187 if check == "" { 8188 return fmt.Errorf("checksum value cannot be empty") 8189 } 8190 8191 parts := strings.Split(check, ":") 8192 if l := len(parts); l != 2 { 8193 return fmt.Errorf(`checksum must be given as "type:value"; got %q`, check) 8194 } 8195 8196 checksumVal := parts[1] 8197 checksumBytes, err := hex.DecodeString(checksumVal) 8198 if err != nil { 8199 return fmt.Errorf("invalid checksum: %v", err) 8200 } 8201 8202 checksumType := parts[0] 8203 expectedLength := 0 8204 switch checksumType { 8205 case "md5": 8206 expectedLength = md5.Size 8207 case "sha1": 8208 expectedLength = sha1.Size 8209 case "sha256": 8210 expectedLength = sha256.Size 8211 case "sha512": 8212 expectedLength = sha512.Size 8213 default: 8214 return fmt.Errorf("unsupported checksum type: %s", checksumType) 8215 } 8216 8217 if len(checksumBytes) != expectedLength { 8218 return fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal) 8219 } 8220 8221 return nil 8222} 8223 8224const ( 8225 ConstraintDistinctProperty = "distinct_property" 8226 ConstraintDistinctHosts = "distinct_hosts" 8227 ConstraintRegex = "regexp" 8228 ConstraintVersion = "version" 8229 ConstraintSemver = "semver" 8230 ConstraintSetContains = "set_contains" 8231 ConstraintSetContainsAll = "set_contains_all" 8232 ConstraintSetContainsAny = "set_contains_any" 8233 ConstraintAttributeIsSet = "is_set" 8234 ConstraintAttributeIsNotSet = "is_not_set" 8235) 8236 8237// Constraints are used to restrict placement options. 8238type Constraint struct { 8239 LTarget string // Left-hand target 8240 RTarget string // Right-hand target 8241 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 8242 str string // Memoized string 8243} 8244 8245// Equal checks if two constraints are equal 8246func (c *Constraint) Equals(o *Constraint) bool { 8247 return c == o || 8248 c.LTarget == o.LTarget && 8249 c.RTarget == o.RTarget && 8250 c.Operand == o.Operand 8251} 8252 8253func (c *Constraint) Equal(o *Constraint) bool { 8254 return c.Equals(o) 8255} 8256 8257func (c *Constraint) Copy() *Constraint { 8258 if c == nil { 8259 return nil 8260 } 8261 nc := new(Constraint) 8262 *nc = *c 8263 return nc 8264} 8265 8266func (c *Constraint) String() string { 8267 if c.str != "" { 8268 return c.str 8269 } 8270 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 8271 return c.str 8272} 8273 8274func (c *Constraint) Validate() error { 8275 var mErr multierror.Error 8276 if c.Operand == "" { 8277 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 8278 } 8279 8280 // requireLtarget specifies whether the constraint requires an LTarget to be 8281 // provided. 8282 requireLtarget := true 8283 8284 // Perform additional validation based on operand 8285 switch c.Operand { 8286 case ConstraintDistinctHosts: 8287 requireLtarget = false 8288 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 8289 if c.RTarget == "" { 8290 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 8291 } 8292 case ConstraintRegex: 8293 if _, err := regexp.Compile(c.RTarget); err != nil { 8294 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 8295 } 8296 case ConstraintVersion: 8297 if _, err := version.NewConstraint(c.RTarget); err != nil { 8298 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 8299 } 8300 case ConstraintSemver: 8301 if _, err := semver.NewConstraint(c.RTarget); err != nil { 8302 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver constraint is invalid: %v", err)) 8303 } 8304 case ConstraintDistinctProperty: 8305 // If a count is set, make sure it is convertible to a uint64 8306 if c.RTarget != "" { 8307 count, err := strconv.ParseUint(c.RTarget, 10, 64) 8308 if err != nil { 8309 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 8310 } else if count < 1 { 8311 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 8312 } 8313 } 8314 case ConstraintAttributeIsSet, ConstraintAttributeIsNotSet: 8315 if c.RTarget != "" { 8316 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q does not support an RTarget", c.Operand)) 8317 } 8318 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 8319 if c.RTarget == "" { 8320 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 8321 } 8322 default: 8323 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 8324 } 8325 8326 // Ensure we have an LTarget for the constraints that need one 8327 if requireLtarget && c.LTarget == "" { 8328 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 8329 } 8330 8331 return mErr.ErrorOrNil() 8332} 8333 8334type Constraints []*Constraint 8335 8336// Equals compares Constraints as a set 8337func (xs *Constraints) Equals(ys *Constraints) bool { 8338 if xs == ys { 8339 return true 8340 } 8341 if xs == nil || ys == nil { 8342 return false 8343 } 8344 if len(*xs) != len(*ys) { 8345 return false 8346 } 8347SETEQUALS: 8348 for _, x := range *xs { 8349 for _, y := range *ys { 8350 if x.Equals(y) { 8351 continue SETEQUALS 8352 } 8353 } 8354 return false 8355 } 8356 return true 8357} 8358 8359// Affinity is used to score placement options based on a weight 8360type Affinity struct { 8361 LTarget string // Left-hand target 8362 RTarget string // Right-hand target 8363 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 8364 Weight int8 // Weight applied to nodes that match the affinity. Can be negative 8365 str string // Memoized string 8366} 8367 8368// Equal checks if two affinities are equal 8369func (a *Affinity) Equals(o *Affinity) bool { 8370 return a == o || 8371 a.LTarget == o.LTarget && 8372 a.RTarget == o.RTarget && 8373 a.Operand == o.Operand && 8374 a.Weight == o.Weight 8375} 8376 8377func (a *Affinity) Equal(o *Affinity) bool { 8378 return a.Equals(o) 8379} 8380 8381func (a *Affinity) Copy() *Affinity { 8382 if a == nil { 8383 return nil 8384 } 8385 na := new(Affinity) 8386 *na = *a 8387 return na 8388} 8389 8390func (a *Affinity) String() string { 8391 if a.str != "" { 8392 return a.str 8393 } 8394 a.str = fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 8395 return a.str 8396} 8397 8398func (a *Affinity) Validate() error { 8399 var mErr multierror.Error 8400 if a.Operand == "" { 8401 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 8402 } 8403 8404 // Perform additional validation based on operand 8405 switch a.Operand { 8406 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 8407 if a.RTarget == "" { 8408 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 8409 } 8410 case ConstraintRegex: 8411 if _, err := regexp.Compile(a.RTarget); err != nil { 8412 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 8413 } 8414 case ConstraintVersion: 8415 if _, err := version.NewConstraint(a.RTarget); err != nil { 8416 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 8417 } 8418 case ConstraintSemver: 8419 if _, err := semver.NewConstraint(a.RTarget); err != nil { 8420 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver affinity is invalid: %v", err)) 8421 } 8422 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 8423 if a.RTarget == "" { 8424 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 8425 } 8426 default: 8427 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 8428 } 8429 8430 // Ensure we have an LTarget 8431 if a.LTarget == "" { 8432 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 8433 } 8434 8435 // Ensure that weight is between -100 and 100, and not zero 8436 if a.Weight == 0 { 8437 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 8438 } 8439 8440 if a.Weight > 100 || a.Weight < -100 { 8441 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 8442 } 8443 8444 return mErr.ErrorOrNil() 8445} 8446 8447// Spread is used to specify desired distribution of allocations according to weight 8448type Spread struct { 8449 // Attribute is the node attribute used as the spread criteria 8450 Attribute string 8451 8452 // Weight is the relative weight of this spread, useful when there are multiple 8453 // spread and affinities 8454 Weight int8 8455 8456 // SpreadTarget is used to describe desired percentages for each attribute value 8457 SpreadTarget []*SpreadTarget 8458 8459 // Memoized string representation 8460 str string 8461} 8462 8463type Affinities []*Affinity 8464 8465// Equals compares Affinities as a set 8466func (xs *Affinities) Equals(ys *Affinities) bool { 8467 if xs == ys { 8468 return true 8469 } 8470 if xs == nil || ys == nil { 8471 return false 8472 } 8473 if len(*xs) != len(*ys) { 8474 return false 8475 } 8476SETEQUALS: 8477 for _, x := range *xs { 8478 for _, y := range *ys { 8479 if x.Equals(y) { 8480 continue SETEQUALS 8481 } 8482 } 8483 return false 8484 } 8485 return true 8486} 8487 8488func (s *Spread) Copy() *Spread { 8489 if s == nil { 8490 return nil 8491 } 8492 ns := new(Spread) 8493 *ns = *s 8494 8495 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 8496 return ns 8497} 8498 8499func (s *Spread) String() string { 8500 if s.str != "" { 8501 return s.str 8502 } 8503 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 8504 return s.str 8505} 8506 8507func (s *Spread) Validate() error { 8508 var mErr multierror.Error 8509 if s.Attribute == "" { 8510 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 8511 } 8512 if s.Weight <= 0 || s.Weight > 100 { 8513 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 8514 } 8515 seen := make(map[string]struct{}) 8516 sumPercent := uint32(0) 8517 8518 for _, target := range s.SpreadTarget { 8519 // Make sure there are no duplicates 8520 _, ok := seen[target.Value] 8521 if !ok { 8522 seen[target.Value] = struct{}{} 8523 } else { 8524 mErr.Errors = append(mErr.Errors, fmt.Errorf("Spread target value %q already defined", target.Value)) 8525 } 8526 if target.Percent > 100 { 8527 mErr.Errors = append(mErr.Errors, fmt.Errorf("Spread target percentage for value %q must be between 0 and 100", target.Value)) 8528 } 8529 sumPercent += uint32(target.Percent) 8530 } 8531 if sumPercent > 100 { 8532 mErr.Errors = append(mErr.Errors, fmt.Errorf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent)) 8533 } 8534 return mErr.ErrorOrNil() 8535} 8536 8537// SpreadTarget is used to specify desired percentages for each attribute value 8538type SpreadTarget struct { 8539 // Value is a single attribute value, like "dc1" 8540 Value string 8541 8542 // Percent is the desired percentage of allocs 8543 Percent uint8 8544 8545 // Memoized string representation 8546 str string 8547} 8548 8549func (s *SpreadTarget) Copy() *SpreadTarget { 8550 if s == nil { 8551 return nil 8552 } 8553 8554 ns := new(SpreadTarget) 8555 *ns = *s 8556 return ns 8557} 8558 8559func (s *SpreadTarget) String() string { 8560 if s.str != "" { 8561 return s.str 8562 } 8563 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 8564 return s.str 8565} 8566 8567// EphemeralDisk is an ephemeral disk object 8568type EphemeralDisk struct { 8569 // Sticky indicates whether the allocation is sticky to a node 8570 Sticky bool 8571 8572 // SizeMB is the size of the local disk 8573 SizeMB int 8574 8575 // Migrate determines if Nomad client should migrate the allocation dir for 8576 // sticky allocations 8577 Migrate bool 8578} 8579 8580// DefaultEphemeralDisk returns a EphemeralDisk with default configurations 8581func DefaultEphemeralDisk() *EphemeralDisk { 8582 return &EphemeralDisk{ 8583 SizeMB: 300, 8584 } 8585} 8586 8587// Validate validates EphemeralDisk 8588func (d *EphemeralDisk) Validate() error { 8589 if d.SizeMB < 10 { 8590 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 8591 } 8592 return nil 8593} 8594 8595// Copy copies the EphemeralDisk struct and returns a new one 8596func (d *EphemeralDisk) Copy() *EphemeralDisk { 8597 ld := new(EphemeralDisk) 8598 *ld = *d 8599 return ld 8600} 8601 8602var ( 8603 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 8604 // server 8605 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 8606) 8607 8608const ( 8609 // VaultChangeModeNoop takes no action when a new token is retrieved. 8610 VaultChangeModeNoop = "noop" 8611 8612 // VaultChangeModeSignal signals the task when a new token is retrieved. 8613 VaultChangeModeSignal = "signal" 8614 8615 // VaultChangeModeRestart restarts the task when a new token is retrieved. 8616 VaultChangeModeRestart = "restart" 8617) 8618 8619// Vault stores the set of permissions a task needs access to from Vault. 8620type Vault struct { 8621 // Policies is the set of policies that the task needs access to 8622 Policies []string 8623 8624 // Namespace is the vault namespace that should be used. 8625 Namespace string 8626 8627 // Env marks whether the Vault Token should be exposed as an environment 8628 // variable 8629 Env bool 8630 8631 // ChangeMode is used to configure the task's behavior when the Vault 8632 // token changes because the original token could not be renewed in time. 8633 ChangeMode string 8634 8635 // ChangeSignal is the signal sent to the task when a new token is 8636 // retrieved. This is only valid when using the signal change mode. 8637 ChangeSignal string 8638} 8639 8640func DefaultVaultBlock() *Vault { 8641 return &Vault{ 8642 Env: true, 8643 ChangeMode: VaultChangeModeRestart, 8644 } 8645} 8646 8647// Copy returns a copy of this Vault block. 8648func (v *Vault) Copy() *Vault { 8649 if v == nil { 8650 return nil 8651 } 8652 8653 nv := new(Vault) 8654 *nv = *v 8655 return nv 8656} 8657 8658func (v *Vault) Canonicalize() { 8659 if v.ChangeSignal != "" { 8660 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 8661 } 8662} 8663 8664// Validate returns if the Vault block is valid. 8665func (v *Vault) Validate() error { 8666 if v == nil { 8667 return nil 8668 } 8669 8670 var mErr multierror.Error 8671 if len(v.Policies) == 0 { 8672 _ = multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 8673 } 8674 8675 for _, p := range v.Policies { 8676 if p == "root" { 8677 _ = multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 8678 } 8679 } 8680 8681 switch v.ChangeMode { 8682 case VaultChangeModeSignal: 8683 if v.ChangeSignal == "" { 8684 _ = multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 8685 } 8686 case VaultChangeModeNoop, VaultChangeModeRestart: 8687 default: 8688 _ = multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 8689 } 8690 8691 return mErr.ErrorOrNil() 8692} 8693 8694const ( 8695 // DeploymentStatuses are the various states a deployment can be be in 8696 DeploymentStatusRunning = "running" 8697 DeploymentStatusPaused = "paused" 8698 DeploymentStatusFailed = "failed" 8699 DeploymentStatusSuccessful = "successful" 8700 DeploymentStatusCancelled = "cancelled" 8701 DeploymentStatusPending = "pending" 8702 DeploymentStatusBlocked = "blocked" 8703 DeploymentStatusUnblocking = "unblocking" 8704 8705 // TODO Statuses and Descriptions do not match 1:1 and we sometimes use the Description as a status flag 8706 8707 // DeploymentStatusDescriptions are the various descriptions of the states a 8708 // deployment can be in. 8709 DeploymentStatusDescriptionRunning = "Deployment is running" 8710 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires manual promotion" 8711 DeploymentStatusDescriptionRunningAutoPromotion = "Deployment is running pending automatic promotion" 8712 DeploymentStatusDescriptionPaused = "Deployment is paused" 8713 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 8714 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 8715 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 8716 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 8717 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 8718 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 8719 8720 // used only in multiregion deployments 8721 DeploymentStatusDescriptionFailedByPeer = "Failed because of an error in peer region" 8722 DeploymentStatusDescriptionBlocked = "Deployment is complete but waiting for peer region" 8723 DeploymentStatusDescriptionUnblocking = "Deployment is unblocking remaining regions" 8724 DeploymentStatusDescriptionPendingForPeer = "Deployment is pending, waiting for peer region" 8725) 8726 8727// DeploymentStatusDescriptionRollback is used to get the status description of 8728// a deployment when rolling back to an older job. 8729func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 8730 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 8731} 8732 8733// DeploymentStatusDescriptionRollbackNoop is used to get the status description of 8734// a deployment when rolling back is not possible because it has the same specification 8735func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 8736 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 8737} 8738 8739// DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 8740// a deployment when there is no target to rollback to but autorevert is desired. 8741func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 8742 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 8743} 8744 8745// Deployment is the object that represents a job deployment which is used to 8746// transition a job between versions. 8747type Deployment struct { 8748 // ID is a generated UUID for the deployment 8749 ID string 8750 8751 // Namespace is the namespace the deployment is created in 8752 Namespace string 8753 8754 // JobID is the job the deployment is created for 8755 JobID string 8756 8757 // JobVersion is the version of the job at which the deployment is tracking 8758 JobVersion uint64 8759 8760 // JobModifyIndex is the ModifyIndex of the job which the deployment is 8761 // tracking. 8762 JobModifyIndex uint64 8763 8764 // JobSpecModifyIndex is the JobModifyIndex of the job which the 8765 // deployment is tracking. 8766 JobSpecModifyIndex uint64 8767 8768 // JobCreateIndex is the create index of the job which the deployment is 8769 // tracking. It is needed so that if the job gets stopped and reran we can 8770 // present the correct list of deployments for the job and not old ones. 8771 JobCreateIndex uint64 8772 8773 // Multiregion specifies if deployment is part of multiregion deployment 8774 IsMultiregion bool 8775 8776 // TaskGroups is the set of task groups effected by the deployment and their 8777 // current deployment status. 8778 TaskGroups map[string]*DeploymentState 8779 8780 // The status of the deployment 8781 Status string 8782 8783 // StatusDescription allows a human readable description of the deployment 8784 // status. 8785 StatusDescription string 8786 8787 CreateIndex uint64 8788 ModifyIndex uint64 8789} 8790 8791// NewDeployment creates a new deployment given the job. 8792func NewDeployment(job *Job) *Deployment { 8793 return &Deployment{ 8794 ID: uuid.Generate(), 8795 Namespace: job.Namespace, 8796 JobID: job.ID, 8797 JobVersion: job.Version, 8798 JobModifyIndex: job.ModifyIndex, 8799 JobSpecModifyIndex: job.JobModifyIndex, 8800 JobCreateIndex: job.CreateIndex, 8801 IsMultiregion: job.IsMultiregion(), 8802 Status: DeploymentStatusRunning, 8803 StatusDescription: DeploymentStatusDescriptionRunning, 8804 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 8805 } 8806} 8807 8808func (d *Deployment) Copy() *Deployment { 8809 if d == nil { 8810 return nil 8811 } 8812 8813 c := &Deployment{} 8814 *c = *d 8815 8816 c.TaskGroups = nil 8817 if l := len(d.TaskGroups); d.TaskGroups != nil { 8818 c.TaskGroups = make(map[string]*DeploymentState, l) 8819 for tg, s := range d.TaskGroups { 8820 c.TaskGroups[tg] = s.Copy() 8821 } 8822 } 8823 8824 return c 8825} 8826 8827// Active returns whether the deployment is active or terminal. 8828func (d *Deployment) Active() bool { 8829 switch d.Status { 8830 case DeploymentStatusRunning, DeploymentStatusPaused, DeploymentStatusBlocked, DeploymentStatusUnblocking, DeploymentStatusPending: 8831 return true 8832 default: 8833 return false 8834 } 8835} 8836 8837// GetID is a helper for getting the ID when the object may be nil 8838func (d *Deployment) GetID() string { 8839 if d == nil { 8840 return "" 8841 } 8842 return d.ID 8843} 8844 8845// HasPlacedCanaries returns whether the deployment has placed canaries 8846func (d *Deployment) HasPlacedCanaries() bool { 8847 if d == nil || len(d.TaskGroups) == 0 { 8848 return false 8849 } 8850 for _, group := range d.TaskGroups { 8851 if len(group.PlacedCanaries) != 0 { 8852 return true 8853 } 8854 } 8855 return false 8856} 8857 8858// RequiresPromotion returns whether the deployment requires promotion to 8859// continue 8860func (d *Deployment) RequiresPromotion() bool { 8861 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 8862 return false 8863 } 8864 for _, group := range d.TaskGroups { 8865 if group.DesiredCanaries > 0 && !group.Promoted { 8866 return true 8867 } 8868 } 8869 return false 8870} 8871 8872// HasAutoPromote determines if all taskgroups are marked auto_promote 8873func (d *Deployment) HasAutoPromote() bool { 8874 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 8875 return false 8876 } 8877 for _, group := range d.TaskGroups { 8878 if !group.AutoPromote { 8879 return false 8880 } 8881 } 8882 return true 8883} 8884 8885func (d *Deployment) GoString() string { 8886 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 8887 for group, state := range d.TaskGroups { 8888 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 8889 } 8890 return base 8891} 8892 8893// DeploymentState tracks the state of a deployment for a given task group. 8894type DeploymentState struct { 8895 // AutoRevert marks whether the task group has indicated the job should be 8896 // reverted on failure 8897 AutoRevert bool 8898 8899 // AutoPromote marks promotion triggered automatically by healthy canaries 8900 // copied from TaskGroup UpdateStrategy in scheduler.reconcile 8901 AutoPromote bool 8902 8903 // ProgressDeadline is the deadline by which an allocation must transition 8904 // to healthy before the deployment is considered failed. This value is set 8905 // by the jobspec `update.progress_deadline` field. 8906 ProgressDeadline time.Duration 8907 8908 // RequireProgressBy is the time by which an allocation must transition to 8909 // healthy before the deployment is considered failed. This value is reset 8910 // to "now" + ProgressDeadline when an allocation updates the deployment. 8911 RequireProgressBy time.Time 8912 8913 // Promoted marks whether the canaries have been promoted 8914 Promoted bool 8915 8916 // PlacedCanaries is the set of placed canary allocations 8917 PlacedCanaries []string 8918 8919 // DesiredCanaries is the number of canaries that should be created. 8920 DesiredCanaries int 8921 8922 // DesiredTotal is the total number of allocations that should be created as 8923 // part of the deployment. 8924 DesiredTotal int 8925 8926 // PlacedAllocs is the number of allocations that have been placed 8927 PlacedAllocs int 8928 8929 // HealthyAllocs is the number of allocations that have been marked healthy. 8930 HealthyAllocs int 8931 8932 // UnhealthyAllocs are allocations that have been marked as unhealthy. 8933 UnhealthyAllocs int 8934} 8935 8936func (d *DeploymentState) GoString() string { 8937 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 8938 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 8939 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 8940 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 8941 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 8942 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 8943 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 8944 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 8945 base += fmt.Sprintf("\n\tAutoPromote: %v", d.AutoPromote) 8946 return base 8947} 8948 8949func (d *DeploymentState) Copy() *DeploymentState { 8950 c := &DeploymentState{} 8951 *c = *d 8952 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 8953 return c 8954} 8955 8956// DeploymentStatusUpdate is used to update the status of a given deployment 8957type DeploymentStatusUpdate struct { 8958 // DeploymentID is the ID of the deployment to update 8959 DeploymentID string 8960 8961 // Status is the new status of the deployment. 8962 Status string 8963 8964 // StatusDescription is the new status description of the deployment. 8965 StatusDescription string 8966} 8967 8968// RescheduleTracker encapsulates previous reschedule events 8969type RescheduleTracker struct { 8970 Events []*RescheduleEvent 8971} 8972 8973func (rt *RescheduleTracker) Copy() *RescheduleTracker { 8974 if rt == nil { 8975 return nil 8976 } 8977 nt := &RescheduleTracker{} 8978 *nt = *rt 8979 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 8980 for _, tracker := range rt.Events { 8981 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 8982 } 8983 nt.Events = rescheduleEvents 8984 return nt 8985} 8986 8987// RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 8988type RescheduleEvent struct { 8989 // RescheduleTime is the timestamp of a reschedule attempt 8990 RescheduleTime int64 8991 8992 // PrevAllocID is the ID of the previous allocation being restarted 8993 PrevAllocID string 8994 8995 // PrevNodeID is the node ID of the previous allocation 8996 PrevNodeID string 8997 8998 // Delay is the reschedule delay associated with the attempt 8999 Delay time.Duration 9000} 9001 9002func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 9003 return &RescheduleEvent{RescheduleTime: rescheduleTime, 9004 PrevAllocID: prevAllocID, 9005 PrevNodeID: prevNodeID, 9006 Delay: delay} 9007} 9008 9009func (re *RescheduleEvent) Copy() *RescheduleEvent { 9010 if re == nil { 9011 return nil 9012 } 9013 copy := new(RescheduleEvent) 9014 *copy = *re 9015 return copy 9016} 9017 9018// DesiredTransition is used to mark an allocation as having a desired state 9019// transition. This information can be used by the scheduler to make the 9020// correct decision. 9021type DesiredTransition struct { 9022 // Migrate is used to indicate that this allocation should be stopped and 9023 // migrated to another node. 9024 Migrate *bool 9025 9026 // Reschedule is used to indicate that this allocation is eligible to be 9027 // rescheduled. Most allocations are automatically eligible for 9028 // rescheduling, so this field is only required when an allocation is not 9029 // automatically eligible. An example is an allocation that is part of a 9030 // deployment. 9031 Reschedule *bool 9032 9033 // ForceReschedule is used to indicate that this allocation must be rescheduled. 9034 // This field is only used when operators want to force a placement even if 9035 // a failed allocation is not eligible to be rescheduled 9036 ForceReschedule *bool 9037} 9038 9039// Merge merges the two desired transitions, preferring the values from the 9040// passed in object. 9041func (d *DesiredTransition) Merge(o *DesiredTransition) { 9042 if o.Migrate != nil { 9043 d.Migrate = o.Migrate 9044 } 9045 9046 if o.Reschedule != nil { 9047 d.Reschedule = o.Reschedule 9048 } 9049 9050 if o.ForceReschedule != nil { 9051 d.ForceReschedule = o.ForceReschedule 9052 } 9053} 9054 9055// ShouldMigrate returns whether the transition object dictates a migration. 9056func (d *DesiredTransition) ShouldMigrate() bool { 9057 return d.Migrate != nil && *d.Migrate 9058} 9059 9060// ShouldReschedule returns whether the transition object dictates a 9061// rescheduling. 9062func (d *DesiredTransition) ShouldReschedule() bool { 9063 return d.Reschedule != nil && *d.Reschedule 9064} 9065 9066// ShouldForceReschedule returns whether the transition object dictates a 9067// forced rescheduling. 9068func (d *DesiredTransition) ShouldForceReschedule() bool { 9069 if d == nil { 9070 return false 9071 } 9072 return d.ForceReschedule != nil && *d.ForceReschedule 9073} 9074 9075const ( 9076 AllocDesiredStatusRun = "run" // Allocation should run 9077 AllocDesiredStatusStop = "stop" // Allocation should stop 9078 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 9079) 9080 9081const ( 9082 AllocClientStatusPending = "pending" 9083 AllocClientStatusRunning = "running" 9084 AllocClientStatusComplete = "complete" 9085 AllocClientStatusFailed = "failed" 9086 AllocClientStatusLost = "lost" 9087) 9088 9089// Allocation is used to allocate the placement of a task group to a node. 9090type Allocation struct { 9091 // msgpack omit empty fields during serialization 9092 _struct bool `codec:",omitempty"` // nolint: structcheck 9093 9094 // ID of the allocation (UUID) 9095 ID string 9096 9097 // Namespace is the namespace the allocation is created in 9098 Namespace string 9099 9100 // ID of the evaluation that generated this allocation 9101 EvalID string 9102 9103 // Name is a logical name of the allocation. 9104 Name string 9105 9106 // NodeID is the node this is being placed on 9107 NodeID string 9108 9109 // NodeName is the name of the node this is being placed on. 9110 NodeName string 9111 9112 // Job is the parent job of the task group being allocated. 9113 // This is copied at allocation time to avoid issues if the job 9114 // definition is updated. 9115 JobID string 9116 Job *Job 9117 9118 // TaskGroup is the name of the task group that should be run 9119 TaskGroup string 9120 9121 // COMPAT(0.11): Remove in 0.11 9122 // Resources is the total set of resources allocated as part 9123 // of this allocation of the task group. Dynamic ports will be set by 9124 // the scheduler. 9125 Resources *Resources 9126 9127 // SharedResources are the resources that are shared by all the tasks in an 9128 // allocation 9129 // Deprecated: use AllocatedResources.Shared instead. 9130 // Keep field to allow us to handle upgrade paths from old versions 9131 SharedResources *Resources 9132 9133 // TaskResources is the set of resources allocated to each 9134 // task. These should sum to the total Resources. Dynamic ports will be 9135 // set by the scheduler. 9136 // Deprecated: use AllocatedResources.Tasks instead. 9137 // Keep field to allow us to handle upgrade paths from old versions 9138 TaskResources map[string]*Resources 9139 9140 // AllocatedResources is the total resources allocated for the task group. 9141 AllocatedResources *AllocatedResources 9142 9143 // Metrics associated with this allocation 9144 Metrics *AllocMetric 9145 9146 // Desired Status of the allocation on the client 9147 DesiredStatus string 9148 9149 // DesiredStatusDescription is meant to provide more human useful information 9150 DesiredDescription string 9151 9152 // DesiredTransition is used to indicate that a state transition 9153 // is desired for a given reason. 9154 DesiredTransition DesiredTransition 9155 9156 // Status of the allocation on the client 9157 ClientStatus string 9158 9159 // ClientStatusDescription is meant to provide more human useful information 9160 ClientDescription string 9161 9162 // TaskStates stores the state of each task, 9163 TaskStates map[string]*TaskState 9164 9165 // AllocStates track meta data associated with changes to the state of the whole allocation, like becoming lost 9166 AllocStates []*AllocState 9167 9168 // PreviousAllocation is the allocation that this allocation is replacing 9169 PreviousAllocation string 9170 9171 // NextAllocation is the allocation that this allocation is being replaced by 9172 NextAllocation string 9173 9174 // DeploymentID identifies an allocation as being created from a 9175 // particular deployment 9176 DeploymentID string 9177 9178 // DeploymentStatus captures the status of the allocation as part of the 9179 // given deployment 9180 DeploymentStatus *AllocDeploymentStatus 9181 9182 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 9183 RescheduleTracker *RescheduleTracker 9184 9185 // NetworkStatus captures networking details of an allocation known at runtime 9186 NetworkStatus *AllocNetworkStatus 9187 9188 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 9189 // that can be rescheduled in the future 9190 FollowupEvalID string 9191 9192 // PreemptedAllocations captures IDs of any allocations that were preempted 9193 // in order to place this allocation 9194 PreemptedAllocations []string 9195 9196 // PreemptedByAllocation tracks the alloc ID of the allocation that caused this allocation 9197 // to stop running because it got preempted 9198 PreemptedByAllocation string 9199 9200 // Raft Indexes 9201 CreateIndex uint64 9202 ModifyIndex uint64 9203 9204 // AllocModifyIndex is not updated when the client updates allocations. This 9205 // lets the client pull only the allocs updated by the server. 9206 AllocModifyIndex uint64 9207 9208 // CreateTime is the time the allocation has finished scheduling and been 9209 // verified by the plan applier. 9210 CreateTime int64 9211 9212 // ModifyTime is the time the allocation was last updated. 9213 ModifyTime int64 9214} 9215 9216// ConsulNamespace returns the Consul namespace of the task group associated 9217// with this allocation. 9218func (a *Allocation) ConsulNamespace() string { 9219 return a.Job.LookupTaskGroup(a.TaskGroup).Consul.GetNamespace() 9220} 9221 9222func (a *Allocation) JobNamespacedID() NamespacedID { 9223 return NewNamespacedID(a.JobID, a.Namespace) 9224} 9225 9226// Index returns the index of the allocation. If the allocation is from a task 9227// group with count greater than 1, there will be multiple allocations for it. 9228func (a *Allocation) Index() uint { 9229 l := len(a.Name) 9230 prefix := len(a.JobID) + len(a.TaskGroup) + 2 9231 if l <= 3 || l <= prefix { 9232 return uint(0) 9233 } 9234 9235 strNum := a.Name[prefix : len(a.Name)-1] 9236 num, _ := strconv.Atoi(strNum) 9237 return uint(num) 9238} 9239 9240// Copy provides a copy of the allocation and deep copies the job 9241func (a *Allocation) Copy() *Allocation { 9242 return a.copyImpl(true) 9243} 9244 9245// CopySkipJob provides a copy of the allocation but doesn't deep copy the job 9246func (a *Allocation) CopySkipJob() *Allocation { 9247 return a.copyImpl(false) 9248} 9249 9250// Canonicalize Allocation to ensure fields are initialized to the expectations 9251// of this version of Nomad. Should be called when restoring persisted 9252// Allocations or receiving Allocations from Nomad agents potentially on an 9253// older version of Nomad. 9254func (a *Allocation) Canonicalize() { 9255 if a.AllocatedResources == nil && a.TaskResources != nil { 9256 ar := AllocatedResources{} 9257 9258 tasks := make(map[string]*AllocatedTaskResources, len(a.TaskResources)) 9259 for name, tr := range a.TaskResources { 9260 atr := AllocatedTaskResources{} 9261 atr.Cpu.CpuShares = int64(tr.CPU) 9262 atr.Memory.MemoryMB = int64(tr.MemoryMB) 9263 atr.Networks = tr.Networks.Copy() 9264 9265 tasks[name] = &atr 9266 } 9267 ar.Tasks = tasks 9268 9269 if a.SharedResources != nil { 9270 ar.Shared.DiskMB = int64(a.SharedResources.DiskMB) 9271 ar.Shared.Networks = a.SharedResources.Networks.Copy() 9272 } 9273 9274 a.AllocatedResources = &ar 9275 } 9276 9277 a.Job.Canonicalize() 9278} 9279 9280func (a *Allocation) copyImpl(job bool) *Allocation { 9281 if a == nil { 9282 return nil 9283 } 9284 na := new(Allocation) 9285 *na = *a 9286 9287 if job { 9288 na.Job = na.Job.Copy() 9289 } 9290 9291 na.AllocatedResources = na.AllocatedResources.Copy() 9292 na.Resources = na.Resources.Copy() 9293 na.SharedResources = na.SharedResources.Copy() 9294 9295 if a.TaskResources != nil { 9296 tr := make(map[string]*Resources, len(na.TaskResources)) 9297 for task, resource := range na.TaskResources { 9298 tr[task] = resource.Copy() 9299 } 9300 na.TaskResources = tr 9301 } 9302 9303 na.Metrics = na.Metrics.Copy() 9304 na.DeploymentStatus = na.DeploymentStatus.Copy() 9305 9306 if a.TaskStates != nil { 9307 ts := make(map[string]*TaskState, len(na.TaskStates)) 9308 for task, state := range na.TaskStates { 9309 ts[task] = state.Copy() 9310 } 9311 na.TaskStates = ts 9312 } 9313 9314 na.RescheduleTracker = a.RescheduleTracker.Copy() 9315 na.PreemptedAllocations = helper.CopySliceString(a.PreemptedAllocations) 9316 return na 9317} 9318 9319// TerminalStatus returns if the desired or actual status is terminal and 9320// will no longer transition. 9321func (a *Allocation) TerminalStatus() bool { 9322 // First check the desired state and if that isn't terminal, check client 9323 // state. 9324 return a.ServerTerminalStatus() || a.ClientTerminalStatus() 9325} 9326 9327// ServerTerminalStatus returns true if the desired state of the allocation is terminal 9328func (a *Allocation) ServerTerminalStatus() bool { 9329 switch a.DesiredStatus { 9330 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 9331 return true 9332 default: 9333 return false 9334 } 9335} 9336 9337// ClientTerminalStatus returns if the client status is terminal and will no longer transition 9338func (a *Allocation) ClientTerminalStatus() bool { 9339 switch a.ClientStatus { 9340 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 9341 return true 9342 default: 9343 return false 9344 } 9345} 9346 9347// ShouldReschedule returns if the allocation is eligible to be rescheduled according 9348// to its status and ReschedulePolicy given its failure time 9349func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 9350 // First check the desired state 9351 switch a.DesiredStatus { 9352 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 9353 return false 9354 default: 9355 } 9356 switch a.ClientStatus { 9357 case AllocClientStatusFailed: 9358 return a.RescheduleEligible(reschedulePolicy, failTime) 9359 default: 9360 return false 9361 } 9362} 9363 9364// RescheduleEligible returns if the allocation is eligible to be rescheduled according 9365// to its ReschedulePolicy and the current state of its reschedule trackers 9366func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 9367 if reschedulePolicy == nil { 9368 return false 9369 } 9370 attempts := reschedulePolicy.Attempts 9371 interval := reschedulePolicy.Interval 9372 enabled := attempts > 0 || reschedulePolicy.Unlimited 9373 if !enabled { 9374 return false 9375 } 9376 if reschedulePolicy.Unlimited { 9377 return true 9378 } 9379 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 9380 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 9381 return true 9382 } 9383 attempted := 0 9384 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 9385 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 9386 timeDiff := failTime.UTC().UnixNano() - lastAttempt 9387 if timeDiff < interval.Nanoseconds() { 9388 attempted += 1 9389 } 9390 } 9391 return attempted < attempts 9392} 9393 9394// LastEventTime is the time of the last task event in the allocation. 9395// It is used to determine allocation failure time. If the FinishedAt field 9396// is not set, the alloc's modify time is used 9397func (a *Allocation) LastEventTime() time.Time { 9398 var lastEventTime time.Time 9399 if a.TaskStates != nil { 9400 for _, s := range a.TaskStates { 9401 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 9402 lastEventTime = s.FinishedAt 9403 } 9404 } 9405 } 9406 9407 if lastEventTime.IsZero() { 9408 return time.Unix(0, a.ModifyTime).UTC() 9409 } 9410 return lastEventTime 9411} 9412 9413// ReschedulePolicy returns the reschedule policy based on the task group 9414func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 9415 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9416 if tg == nil { 9417 return nil 9418 } 9419 return tg.ReschedulePolicy 9420} 9421 9422// MigrateStrategy returns the migrate strategy based on the task group 9423func (a *Allocation) MigrateStrategy() *MigrateStrategy { 9424 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9425 if tg == nil { 9426 return nil 9427 } 9428 return tg.Migrate 9429} 9430 9431// NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 9432// and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 9433func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 9434 failTime := a.LastEventTime() 9435 reschedulePolicy := a.ReschedulePolicy() 9436 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 9437 return time.Time{}, false 9438 } 9439 9440 nextDelay := a.NextDelay() 9441 nextRescheduleTime := failTime.Add(nextDelay) 9442 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 9443 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 9444 // Check for eligibility based on the interval if max attempts is set 9445 attempted := 0 9446 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 9447 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 9448 timeDiff := failTime.UTC().UnixNano() - lastAttempt 9449 if timeDiff < reschedulePolicy.Interval.Nanoseconds() { 9450 attempted += 1 9451 } 9452 } 9453 rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval 9454 } 9455 return nextRescheduleTime, rescheduleEligible 9456} 9457 9458// ShouldClientStop tests an alloc for StopAfterClientDisconnect configuration 9459func (a *Allocation) ShouldClientStop() bool { 9460 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9461 if tg == nil || 9462 tg.StopAfterClientDisconnect == nil || 9463 *tg.StopAfterClientDisconnect == 0*time.Nanosecond { 9464 return false 9465 } 9466 return true 9467} 9468 9469// WaitClientStop uses the reschedule delay mechanism to block rescheduling until 9470// StopAfterClientDisconnect's block interval passes 9471func (a *Allocation) WaitClientStop() time.Time { 9472 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9473 9474 // An alloc can only be marked lost once, so use the first lost transition 9475 var t time.Time 9476 for _, s := range a.AllocStates { 9477 if s.Field == AllocStateFieldClientStatus && 9478 s.Value == AllocClientStatusLost { 9479 t = s.Time 9480 break 9481 } 9482 } 9483 9484 // On the first pass, the alloc hasn't been marked lost yet, and so we start 9485 // counting from now 9486 if t.IsZero() { 9487 t = time.Now().UTC() 9488 } 9489 9490 // Find the max kill timeout 9491 kill := DefaultKillTimeout 9492 for _, t := range tg.Tasks { 9493 if t.KillTimeout > kill { 9494 kill = t.KillTimeout 9495 } 9496 } 9497 9498 return t.Add(*tg.StopAfterClientDisconnect + kill) 9499} 9500 9501// NextDelay returns a duration after which the allocation can be rescheduled. 9502// It is calculated according to the delay function and previous reschedule attempts. 9503func (a *Allocation) NextDelay() time.Duration { 9504 policy := a.ReschedulePolicy() 9505 // Can be nil if the task group was updated to remove its reschedule policy 9506 if policy == nil { 9507 return 0 9508 } 9509 delayDur := policy.Delay 9510 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 9511 return delayDur 9512 } 9513 events := a.RescheduleTracker.Events 9514 switch policy.DelayFunction { 9515 case "exponential": 9516 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 9517 case "fibonacci": 9518 if len(events) >= 2 { 9519 fibN1Delay := events[len(events)-1].Delay 9520 fibN2Delay := events[len(events)-2].Delay 9521 // Handle reset of delay ceiling which should cause 9522 // a new series to start 9523 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 9524 delayDur = fibN1Delay 9525 } else { 9526 delayDur = fibN1Delay + fibN2Delay 9527 } 9528 } 9529 default: 9530 return delayDur 9531 } 9532 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 9533 delayDur = policy.MaxDelay 9534 // check if delay needs to be reset 9535 9536 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 9537 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 9538 if timeDiff > delayDur.Nanoseconds() { 9539 delayDur = policy.Delay 9540 } 9541 9542 } 9543 9544 return delayDur 9545} 9546 9547// Terminated returns if the allocation is in a terminal state on a client. 9548func (a *Allocation) Terminated() bool { 9549 if a.ClientStatus == AllocClientStatusFailed || 9550 a.ClientStatus == AllocClientStatusComplete || 9551 a.ClientStatus == AllocClientStatusLost { 9552 return true 9553 } 9554 return false 9555} 9556 9557// SetStopped updates the allocation in place to a DesiredStatus stop, with the ClientStatus 9558func (a *Allocation) SetStop(clientStatus, clientDesc string) { 9559 a.DesiredStatus = AllocDesiredStatusStop 9560 a.ClientStatus = clientStatus 9561 a.ClientDescription = clientDesc 9562 a.AppendState(AllocStateFieldClientStatus, clientStatus) 9563} 9564 9565// AppendState creates and appends an AllocState entry recording the time of the state 9566// transition. Used to mark the transition to lost 9567func (a *Allocation) AppendState(field AllocStateField, value string) { 9568 a.AllocStates = append(a.AllocStates, &AllocState{ 9569 Field: field, 9570 Value: value, 9571 Time: time.Now().UTC(), 9572 }) 9573} 9574 9575// RanSuccessfully returns whether the client has ran the allocation and all 9576// tasks finished successfully. Critically this function returns whether the 9577// allocation has ran to completion and not just that the alloc has converged to 9578// its desired state. That is to say that a batch allocation must have finished 9579// with exit code 0 on all task groups. This doesn't really have meaning on a 9580// non-batch allocation because a service and system allocation should not 9581// finish. 9582func (a *Allocation) RanSuccessfully() bool { 9583 // Handle the case the client hasn't started the allocation. 9584 if len(a.TaskStates) == 0 { 9585 return false 9586 } 9587 9588 // Check to see if all the tasks finished successfully in the allocation 9589 allSuccess := true 9590 for _, state := range a.TaskStates { 9591 allSuccess = allSuccess && state.Successful() 9592 } 9593 9594 return allSuccess 9595} 9596 9597// ShouldMigrate returns if the allocation needs data migration 9598func (a *Allocation) ShouldMigrate() bool { 9599 if a.PreviousAllocation == "" { 9600 return false 9601 } 9602 9603 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 9604 return false 9605 } 9606 9607 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9608 9609 // if the task group is nil or the ephemeral disk block isn't present then 9610 // we won't migrate 9611 if tg == nil || tg.EphemeralDisk == nil { 9612 return false 9613 } 9614 9615 // We won't migrate any data is the user hasn't enabled migration or the 9616 // disk is not marked as sticky 9617 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 9618 return false 9619 } 9620 9621 return true 9622} 9623 9624// SetEventDisplayMessage populates the display message if its not already set, 9625// a temporary fix to handle old allocations that don't have it. 9626// This method will be removed in a future release. 9627func (a *Allocation) SetEventDisplayMessages() { 9628 setDisplayMsg(a.TaskStates) 9629} 9630 9631// COMPAT(0.11): Remove in 0.11 9632// ComparableResources returns the resources on the allocation 9633// handling upgrade paths. After 0.11 calls to this should be replaced with: 9634// alloc.AllocatedResources.Comparable() 9635func (a *Allocation) ComparableResources() *ComparableResources { 9636 // ALloc already has 0.9+ behavior 9637 if a.AllocatedResources != nil { 9638 return a.AllocatedResources.Comparable() 9639 } 9640 9641 var resources *Resources 9642 if a.Resources != nil { 9643 resources = a.Resources 9644 } else if a.TaskResources != nil { 9645 resources = new(Resources) 9646 resources.Add(a.SharedResources) 9647 for _, taskResource := range a.TaskResources { 9648 resources.Add(taskResource) 9649 } 9650 } 9651 9652 // Upgrade path 9653 return &ComparableResources{ 9654 Flattened: AllocatedTaskResources{ 9655 Cpu: AllocatedCpuResources{ 9656 CpuShares: int64(resources.CPU), 9657 }, 9658 Memory: AllocatedMemoryResources{ 9659 MemoryMB: int64(resources.MemoryMB), 9660 }, 9661 Networks: resources.Networks, 9662 }, 9663 Shared: AllocatedSharedResources{ 9664 DiskMB: int64(resources.DiskMB), 9665 }, 9666 } 9667} 9668 9669// LookupTask by name from the Allocation. Returns nil if the Job is not set, the 9670// TaskGroup does not exist, or the task name cannot be found. 9671func (a *Allocation) LookupTask(name string) *Task { 9672 if a.Job == nil { 9673 return nil 9674 } 9675 9676 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9677 if tg == nil { 9678 return nil 9679 } 9680 9681 return tg.LookupTask(name) 9682} 9683 9684// Stub returns a list stub for the allocation 9685func (a *Allocation) Stub(fields *AllocStubFields) *AllocListStub { 9686 s := &AllocListStub{ 9687 ID: a.ID, 9688 EvalID: a.EvalID, 9689 Name: a.Name, 9690 Namespace: a.Namespace, 9691 NodeID: a.NodeID, 9692 NodeName: a.NodeName, 9693 JobID: a.JobID, 9694 JobType: a.Job.Type, 9695 JobVersion: a.Job.Version, 9696 TaskGroup: a.TaskGroup, 9697 DesiredStatus: a.DesiredStatus, 9698 DesiredDescription: a.DesiredDescription, 9699 ClientStatus: a.ClientStatus, 9700 ClientDescription: a.ClientDescription, 9701 DesiredTransition: a.DesiredTransition, 9702 TaskStates: a.TaskStates, 9703 DeploymentStatus: a.DeploymentStatus, 9704 FollowupEvalID: a.FollowupEvalID, 9705 RescheduleTracker: a.RescheduleTracker, 9706 PreemptedAllocations: a.PreemptedAllocations, 9707 PreemptedByAllocation: a.PreemptedByAllocation, 9708 CreateIndex: a.CreateIndex, 9709 ModifyIndex: a.ModifyIndex, 9710 CreateTime: a.CreateTime, 9711 ModifyTime: a.ModifyTime, 9712 } 9713 9714 if fields != nil { 9715 if fields.Resources { 9716 s.AllocatedResources = a.AllocatedResources 9717 } 9718 if !fields.TaskStates { 9719 s.TaskStates = nil 9720 } 9721 } 9722 9723 return s 9724} 9725 9726// AllocationDiff converts an Allocation type to an AllocationDiff type 9727// If at any time, modification are made to AllocationDiff so that an 9728// Allocation can no longer be safely converted to AllocationDiff, 9729// this method should be changed accordingly. 9730func (a *Allocation) AllocationDiff() *AllocationDiff { 9731 return (*AllocationDiff)(a) 9732} 9733 9734// AllocationDiff is another named type for Allocation (to use the same fields), 9735// which is used to represent the delta for an Allocation. If you need a method 9736// defined on the al 9737type AllocationDiff Allocation 9738 9739// AllocListStub is used to return a subset of alloc information 9740type AllocListStub struct { 9741 ID string 9742 EvalID string 9743 Name string 9744 Namespace string 9745 NodeID string 9746 NodeName string 9747 JobID string 9748 JobType string 9749 JobVersion uint64 9750 TaskGroup string 9751 AllocatedResources *AllocatedResources `json:",omitempty"` 9752 DesiredStatus string 9753 DesiredDescription string 9754 ClientStatus string 9755 ClientDescription string 9756 DesiredTransition DesiredTransition 9757 TaskStates map[string]*TaskState 9758 DeploymentStatus *AllocDeploymentStatus 9759 FollowupEvalID string 9760 RescheduleTracker *RescheduleTracker 9761 PreemptedAllocations []string 9762 PreemptedByAllocation string 9763 CreateIndex uint64 9764 ModifyIndex uint64 9765 CreateTime int64 9766 ModifyTime int64 9767} 9768 9769// SetEventDisplayMessage populates the display message if its not already set, 9770// a temporary fix to handle old allocations that don't have it. 9771// This method will be removed in a future release. 9772func (a *AllocListStub) SetEventDisplayMessages() { 9773 setDisplayMsg(a.TaskStates) 9774} 9775 9776func setDisplayMsg(taskStates map[string]*TaskState) { 9777 for _, taskState := range taskStates { 9778 for _, event := range taskState.Events { 9779 event.PopulateEventDisplayMessage() 9780 } 9781 } 9782} 9783 9784// AllocStubFields defines which fields are included in the AllocListStub. 9785type AllocStubFields struct { 9786 // Resources includes resource-related fields if true. 9787 Resources bool 9788 9789 // TaskStates removes the TaskStates field if false (default is to 9790 // include TaskStates). 9791 TaskStates bool 9792} 9793 9794func NewAllocStubFields() *AllocStubFields { 9795 return &AllocStubFields{ 9796 // Maintain backward compatibility by retaining task states by 9797 // default. 9798 TaskStates: true, 9799 } 9800} 9801 9802// AllocMetric is used to track various metrics while attempting 9803// to make an allocation. These are used to debug a job, or to better 9804// understand the pressure within the system. 9805type AllocMetric struct { 9806 // NodesEvaluated is the number of nodes that were evaluated 9807 NodesEvaluated int 9808 9809 // NodesFiltered is the number of nodes filtered due to a constraint 9810 NodesFiltered int 9811 9812 // NodesAvailable is the number of nodes available for evaluation per DC. 9813 NodesAvailable map[string]int 9814 9815 // ClassFiltered is the number of nodes filtered by class 9816 ClassFiltered map[string]int 9817 9818 // ConstraintFiltered is the number of failures caused by constraint 9819 ConstraintFiltered map[string]int 9820 9821 // NodesExhausted is the number of nodes skipped due to being 9822 // exhausted of at least one resource 9823 NodesExhausted int 9824 9825 // ClassExhausted is the number of nodes exhausted by class 9826 ClassExhausted map[string]int 9827 9828 // DimensionExhausted provides the count by dimension or reason 9829 DimensionExhausted map[string]int 9830 9831 // QuotaExhausted provides the exhausted dimensions 9832 QuotaExhausted []string 9833 9834 // ResourcesExhausted provides the amount of resources exhausted by task 9835 // during the allocation placement 9836 ResourcesExhausted map[string]*Resources 9837 9838 // Scores is the scores of the final few nodes remaining 9839 // for placement. The top score is typically selected. 9840 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 9841 Scores map[string]float64 9842 9843 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 9844 ScoreMetaData []*NodeScoreMeta 9845 9846 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 9847 // we receive normalized score during the last step of the scoring stack. 9848 nodeScoreMeta *NodeScoreMeta 9849 9850 // topScores is used to maintain a heap of the top K nodes with 9851 // the highest normalized score 9852 topScores *kheap.ScoreHeap 9853 9854 // AllocationTime is a measure of how long the allocation 9855 // attempt took. This can affect performance and SLAs. 9856 AllocationTime time.Duration 9857 9858 // CoalescedFailures indicates the number of other 9859 // allocations that were coalesced into this failed allocation. 9860 // This is to prevent creating many failed allocations for a 9861 // single task group. 9862 CoalescedFailures int 9863} 9864 9865func (a *AllocMetric) Copy() *AllocMetric { 9866 if a == nil { 9867 return nil 9868 } 9869 na := new(AllocMetric) 9870 *na = *a 9871 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 9872 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 9873 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 9874 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 9875 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 9876 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 9877 na.Scores = helper.CopyMapStringFloat64(na.Scores) 9878 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 9879 return na 9880} 9881 9882func (a *AllocMetric) EvaluateNode() { 9883 a.NodesEvaluated += 1 9884} 9885 9886func (a *AllocMetric) FilterNode(node *Node, constraint string) { 9887 a.NodesFiltered += 1 9888 if node != nil && node.NodeClass != "" { 9889 if a.ClassFiltered == nil { 9890 a.ClassFiltered = make(map[string]int) 9891 } 9892 a.ClassFiltered[node.NodeClass] += 1 9893 } 9894 if constraint != "" { 9895 if a.ConstraintFiltered == nil { 9896 a.ConstraintFiltered = make(map[string]int) 9897 } 9898 a.ConstraintFiltered[constraint] += 1 9899 } 9900} 9901 9902func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 9903 a.NodesExhausted += 1 9904 if node != nil && node.NodeClass != "" { 9905 if a.ClassExhausted == nil { 9906 a.ClassExhausted = make(map[string]int) 9907 } 9908 a.ClassExhausted[node.NodeClass] += 1 9909 } 9910 if dimension != "" { 9911 if a.DimensionExhausted == nil { 9912 a.DimensionExhausted = make(map[string]int) 9913 } 9914 a.DimensionExhausted[dimension] += 1 9915 } 9916} 9917 9918func (a *AllocMetric) ExhaustQuota(dimensions []string) { 9919 if a.QuotaExhausted == nil { 9920 a.QuotaExhausted = make([]string, 0, len(dimensions)) 9921 } 9922 9923 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 9924} 9925 9926// ExhaustResources updates the amount of resources exhausted for the 9927// allocation because of the given task group. 9928func (a *AllocMetric) ExhaustResources(tg *TaskGroup) { 9929 if a.DimensionExhausted == nil { 9930 return 9931 } 9932 9933 if a.ResourcesExhausted == nil { 9934 a.ResourcesExhausted = make(map[string]*Resources) 9935 } 9936 9937 for _, t := range tg.Tasks { 9938 exhaustedResources := a.ResourcesExhausted[t.Name] 9939 if exhaustedResources == nil { 9940 exhaustedResources = &Resources{} 9941 } 9942 9943 if a.DimensionExhausted["memory"] > 0 { 9944 exhaustedResources.MemoryMB += t.Resources.MemoryMB 9945 } 9946 9947 if a.DimensionExhausted["cpu"] > 0 { 9948 exhaustedResources.CPU += t.Resources.CPU 9949 } 9950 9951 a.ResourcesExhausted[t.Name] = exhaustedResources 9952 } 9953} 9954 9955// ScoreNode is used to gather top K scoring nodes in a heap 9956func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 9957 // Create nodeScoreMeta lazily if its the first time or if its a new node 9958 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 9959 a.nodeScoreMeta = &NodeScoreMeta{ 9960 NodeID: node.ID, 9961 Scores: make(map[string]float64), 9962 } 9963 } 9964 if name == NormScorerName { 9965 a.nodeScoreMeta.NormScore = score 9966 // Once we have the normalized score we can push to the heap 9967 // that tracks top K by normalized score 9968 9969 // Create the heap if its not there already 9970 if a.topScores == nil { 9971 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 9972 } 9973 heap.Push(a.topScores, a.nodeScoreMeta) 9974 9975 // Clear out this entry because its now in the heap 9976 a.nodeScoreMeta = nil 9977 } else { 9978 a.nodeScoreMeta.Scores[name] = score 9979 } 9980} 9981 9982// PopulateScoreMetaData populates a map of scorer to scoring metadata 9983// The map is populated by popping elements from a heap of top K scores 9984// maintained per scorer 9985func (a *AllocMetric) PopulateScoreMetaData() { 9986 if a.topScores == nil { 9987 return 9988 } 9989 9990 if a.ScoreMetaData == nil { 9991 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 9992 } 9993 heapItems := a.topScores.GetItemsReverse() 9994 for i, item := range heapItems { 9995 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 9996 } 9997} 9998 9999// NodeScoreMeta captures scoring meta data derived from 10000// different scoring factors. 10001type NodeScoreMeta struct { 10002 NodeID string 10003 Scores map[string]float64 10004 NormScore float64 10005} 10006 10007func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 10008 if s == nil { 10009 return nil 10010 } 10011 ns := new(NodeScoreMeta) 10012 *ns = *s 10013 return ns 10014} 10015 10016func (s *NodeScoreMeta) String() string { 10017 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 10018} 10019 10020func (s *NodeScoreMeta) Score() float64 { 10021 return s.NormScore 10022} 10023 10024func (s *NodeScoreMeta) Data() interface{} { 10025 return s 10026} 10027 10028// AllocNetworkStatus captures the status of an allocation's network during runtime. 10029// Depending on the network mode, an allocation's address may need to be known to other 10030// systems in Nomad such as service registration. 10031type AllocNetworkStatus struct { 10032 InterfaceName string 10033 Address string 10034 DNS *DNSConfig 10035} 10036 10037func (a *AllocNetworkStatus) Copy() *AllocNetworkStatus { 10038 if a == nil { 10039 return nil 10040 } 10041 return &AllocNetworkStatus{ 10042 InterfaceName: a.InterfaceName, 10043 Address: a.Address, 10044 DNS: a.DNS.Copy(), 10045 } 10046} 10047 10048// AllocDeploymentStatus captures the status of the allocation as part of the 10049// deployment. This can include things like if the allocation has been marked as 10050// healthy. 10051type AllocDeploymentStatus struct { 10052 // Healthy marks whether the allocation has been marked healthy or unhealthy 10053 // as part of a deployment. It can be unset if it has neither been marked 10054 // healthy or unhealthy. 10055 Healthy *bool 10056 10057 // Timestamp is the time at which the health status was set. 10058 Timestamp time.Time 10059 10060 // Canary marks whether the allocation is a canary or not. A canary that has 10061 // been promoted will have this field set to false. 10062 Canary bool 10063 10064 // ModifyIndex is the raft index in which the deployment status was last 10065 // changed. 10066 ModifyIndex uint64 10067} 10068 10069// HasHealth returns true if the allocation has its health set. 10070func (a *AllocDeploymentStatus) HasHealth() bool { 10071 return a != nil && a.Healthy != nil 10072} 10073 10074// IsHealthy returns if the allocation is marked as healthy as part of a 10075// deployment 10076func (a *AllocDeploymentStatus) IsHealthy() bool { 10077 if a == nil { 10078 return false 10079 } 10080 10081 return a.Healthy != nil && *a.Healthy 10082} 10083 10084// IsUnhealthy returns if the allocation is marked as unhealthy as part of a 10085// deployment 10086func (a *AllocDeploymentStatus) IsUnhealthy() bool { 10087 if a == nil { 10088 return false 10089 } 10090 10091 return a.Healthy != nil && !*a.Healthy 10092} 10093 10094// IsCanary returns if the allocation is marked as a canary 10095func (a *AllocDeploymentStatus) IsCanary() bool { 10096 if a == nil { 10097 return false 10098 } 10099 10100 return a.Canary 10101} 10102 10103func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 10104 if a == nil { 10105 return nil 10106 } 10107 10108 c := new(AllocDeploymentStatus) 10109 *c = *a 10110 10111 if a.Healthy != nil { 10112 c.Healthy = helper.BoolToPtr(*a.Healthy) 10113 } 10114 10115 return c 10116} 10117 10118const ( 10119 EvalStatusBlocked = "blocked" 10120 EvalStatusPending = "pending" 10121 EvalStatusComplete = "complete" 10122 EvalStatusFailed = "failed" 10123 EvalStatusCancelled = "canceled" 10124) 10125 10126const ( 10127 EvalTriggerJobRegister = "job-register" 10128 EvalTriggerJobDeregister = "job-deregister" 10129 EvalTriggerPeriodicJob = "periodic-job" 10130 EvalTriggerNodeDrain = "node-drain" 10131 EvalTriggerNodeUpdate = "node-update" 10132 EvalTriggerAllocStop = "alloc-stop" 10133 EvalTriggerScheduled = "scheduled" 10134 EvalTriggerRollingUpdate = "rolling-update" 10135 EvalTriggerDeploymentWatcher = "deployment-watcher" 10136 EvalTriggerFailedFollowUp = "failed-follow-up" 10137 EvalTriggerMaxPlans = "max-plan-attempts" 10138 EvalTriggerRetryFailedAlloc = "alloc-failure" 10139 EvalTriggerQueuedAllocs = "queued-allocs" 10140 EvalTriggerPreemption = "preemption" 10141 EvalTriggerScaling = "job-scaling" 10142) 10143 10144const ( 10145 // CoreJobEvalGC is used for the garbage collection of evaluations 10146 // and allocations. We periodically scan evaluations in a terminal state, 10147 // in which all the corresponding allocations are also terminal. We 10148 // delete these out of the system to bound the state. 10149 CoreJobEvalGC = "eval-gc" 10150 10151 // CoreJobNodeGC is used for the garbage collection of failed nodes. 10152 // We periodically scan nodes in a terminal state, and if they have no 10153 // corresponding allocations we delete these out of the system. 10154 CoreJobNodeGC = "node-gc" 10155 10156 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 10157 // periodically scan garbage collectible jobs and check if both their 10158 // evaluations and allocations are terminal. If so, we delete these out of 10159 // the system. 10160 CoreJobJobGC = "job-gc" 10161 10162 // CoreJobDeploymentGC is used for the garbage collection of eligible 10163 // deployments. We periodically scan garbage collectible deployments and 10164 // check if they are terminal. If so, we delete these out of the system. 10165 CoreJobDeploymentGC = "deployment-gc" 10166 10167 // CoreJobCSIVolumeClaimGC is use for the garbage collection of CSI 10168 // volume claims. We periodically scan volumes to see if no allocs are 10169 // claiming them. If so, we unclaim the volume. 10170 CoreJobCSIVolumeClaimGC = "csi-volume-claim-gc" 10171 10172 // CoreJobCSIPluginGC is use for the garbage collection of CSI plugins. 10173 // We periodically scan plugins to see if they have no associated volumes 10174 // or allocs running them. If so, we delete the plugin. 10175 CoreJobCSIPluginGC = "csi-plugin-gc" 10176 10177 // CoreJobOneTimeTokenGC is use for the garbage collection of one-time 10178 // tokens. We periodically scan for expired tokens and delete them. 10179 CoreJobOneTimeTokenGC = "one-time-token-gc" 10180 10181 // CoreJobForceGC is used to force garbage collection of all GCable objects. 10182 CoreJobForceGC = "force-gc" 10183) 10184 10185// Evaluation is used anytime we need to apply business logic as a result 10186// of a change to our desired state (job specification) or the emergent state 10187// (registered nodes). When the inputs change, we need to "evaluate" them, 10188// potentially taking action (allocation of work) or doing nothing if the state 10189// of the world does not require it. 10190type Evaluation struct { 10191 // msgpack omit empty fields during serialization 10192 _struct bool `codec:",omitempty"` // nolint: structcheck 10193 10194 // ID is a randomly generated UUID used for this evaluation. This 10195 // is assigned upon the creation of the evaluation. 10196 ID string 10197 10198 // Namespace is the namespace the evaluation is created in 10199 Namespace string 10200 10201 // Priority is used to control scheduling importance and if this job 10202 // can preempt other jobs. 10203 Priority int 10204 10205 // Type is used to control which schedulers are available to handle 10206 // this evaluation. 10207 Type string 10208 10209 // TriggeredBy is used to give some insight into why this Eval 10210 // was created. (Job change, node failure, alloc failure, etc). 10211 TriggeredBy string 10212 10213 // JobID is the job this evaluation is scoped to. Evaluations cannot 10214 // be run in parallel for a given JobID, so we serialize on this. 10215 JobID string 10216 10217 // JobModifyIndex is the modify index of the job at the time 10218 // the evaluation was created 10219 JobModifyIndex uint64 10220 10221 // NodeID is the node that was affected triggering the evaluation. 10222 NodeID string 10223 10224 // NodeModifyIndex is the modify index of the node at the time 10225 // the evaluation was created 10226 NodeModifyIndex uint64 10227 10228 // DeploymentID is the ID of the deployment that triggered the evaluation. 10229 DeploymentID string 10230 10231 // Status of the evaluation 10232 Status string 10233 10234 // StatusDescription is meant to provide more human useful information 10235 StatusDescription string 10236 10237 // Wait is a minimum wait time for running the eval. This is used to 10238 // support a rolling upgrade in versions prior to 0.7.0 10239 // Deprecated 10240 Wait time.Duration 10241 10242 // WaitUntil is the time when this eval should be run. This is used to 10243 // supported delayed rescheduling of failed allocations 10244 WaitUntil time.Time 10245 10246 // NextEval is the evaluation ID for the eval created to do a followup. 10247 // This is used to support rolling upgrades and failed-follow-up evals, where 10248 // we need a chain of evaluations. 10249 NextEval string 10250 10251 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 10252 // This is used to support rolling upgrades and failed-follow-up evals, where 10253 // we need a chain of evaluations. 10254 PreviousEval string 10255 10256 // BlockedEval is the evaluation ID for a created blocked eval. A 10257 // blocked eval will be created if all allocations could not be placed due 10258 // to constraints or lacking resources. 10259 BlockedEval string 10260 10261 // FailedTGAllocs are task groups which have allocations that could not be 10262 // made, but the metrics are persisted so that the user can use the feedback 10263 // to determine the cause. 10264 FailedTGAllocs map[string]*AllocMetric 10265 10266 // ClassEligibility tracks computed node classes that have been explicitly 10267 // marked as eligible or ineligible. 10268 ClassEligibility map[string]bool 10269 10270 // QuotaLimitReached marks whether a quota limit was reached for the 10271 // evaluation. 10272 QuotaLimitReached string 10273 10274 // EscapedComputedClass marks whether the job has constraints that are not 10275 // captured by computed node classes. 10276 EscapedComputedClass bool 10277 10278 // AnnotatePlan triggers the scheduler to provide additional annotations 10279 // during the evaluation. This should not be set during normal operations. 10280 AnnotatePlan bool 10281 10282 // QueuedAllocations is the number of unplaced allocations at the time the 10283 // evaluation was processed. The map is keyed by Task Group names. 10284 QueuedAllocations map[string]int 10285 10286 // LeaderACL provides the ACL token to when issuing RPCs back to the 10287 // leader. This will be a valid management token as long as the leader is 10288 // active. This should not ever be exposed via the API. 10289 LeaderACL string 10290 10291 // SnapshotIndex is the Raft index of the snapshot used to process the 10292 // evaluation. The index will either be set when it has gone through the 10293 // scheduler or if a blocked evaluation is being created. The index is set 10294 // in this case so we can determine if an early unblocking is required since 10295 // capacity has changed since the evaluation was created. This can result in 10296 // the SnapshotIndex being less than the CreateIndex. 10297 SnapshotIndex uint64 10298 10299 // Raft Indexes 10300 CreateIndex uint64 10301 ModifyIndex uint64 10302 10303 CreateTime int64 10304 ModifyTime int64 10305} 10306 10307// TerminalStatus returns if the current status is terminal and 10308// will no longer transition. 10309func (e *Evaluation) TerminalStatus() bool { 10310 switch e.Status { 10311 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 10312 return true 10313 default: 10314 return false 10315 } 10316} 10317 10318func (e *Evaluation) GoString() string { 10319 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 10320} 10321 10322func (e *Evaluation) Copy() *Evaluation { 10323 if e == nil { 10324 return nil 10325 } 10326 ne := new(Evaluation) 10327 *ne = *e 10328 10329 // Copy ClassEligibility 10330 if e.ClassEligibility != nil { 10331 classes := make(map[string]bool, len(e.ClassEligibility)) 10332 for class, elig := range e.ClassEligibility { 10333 classes[class] = elig 10334 } 10335 ne.ClassEligibility = classes 10336 } 10337 10338 // Copy FailedTGAllocs 10339 if e.FailedTGAllocs != nil { 10340 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 10341 for tg, metric := range e.FailedTGAllocs { 10342 failedTGs[tg] = metric.Copy() 10343 } 10344 ne.FailedTGAllocs = failedTGs 10345 } 10346 10347 // Copy queued allocations 10348 if e.QueuedAllocations != nil { 10349 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 10350 for tg, num := range e.QueuedAllocations { 10351 queuedAllocations[tg] = num 10352 } 10353 ne.QueuedAllocations = queuedAllocations 10354 } 10355 10356 return ne 10357} 10358 10359// ShouldEnqueue checks if a given evaluation should be enqueued into the 10360// eval_broker 10361func (e *Evaluation) ShouldEnqueue() bool { 10362 switch e.Status { 10363 case EvalStatusPending: 10364 return true 10365 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 10366 return false 10367 default: 10368 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 10369 } 10370} 10371 10372// ShouldBlock checks if a given evaluation should be entered into the blocked 10373// eval tracker. 10374func (e *Evaluation) ShouldBlock() bool { 10375 switch e.Status { 10376 case EvalStatusBlocked: 10377 return true 10378 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 10379 return false 10380 default: 10381 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 10382 } 10383} 10384 10385// MakePlan is used to make a plan from the given evaluation 10386// for a given Job 10387func (e *Evaluation) MakePlan(j *Job) *Plan { 10388 p := &Plan{ 10389 EvalID: e.ID, 10390 Priority: e.Priority, 10391 Job: j, 10392 NodeUpdate: make(map[string][]*Allocation), 10393 NodeAllocation: make(map[string][]*Allocation), 10394 NodePreemptions: make(map[string][]*Allocation), 10395 } 10396 if j != nil { 10397 p.AllAtOnce = j.AllAtOnce 10398 } 10399 return p 10400} 10401 10402// NextRollingEval creates an evaluation to followup this eval for rolling updates 10403func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 10404 now := time.Now().UTC().UnixNano() 10405 return &Evaluation{ 10406 ID: uuid.Generate(), 10407 Namespace: e.Namespace, 10408 Priority: e.Priority, 10409 Type: e.Type, 10410 TriggeredBy: EvalTriggerRollingUpdate, 10411 JobID: e.JobID, 10412 JobModifyIndex: e.JobModifyIndex, 10413 Status: EvalStatusPending, 10414 Wait: wait, 10415 PreviousEval: e.ID, 10416 CreateTime: now, 10417 ModifyTime: now, 10418 } 10419} 10420 10421// CreateBlockedEval creates a blocked evaluation to followup this eval to place any 10422// failed allocations. It takes the classes marked explicitly eligible or 10423// ineligible, whether the job has escaped computed node classes and whether the 10424// quota limit was reached. 10425func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 10426 escaped bool, quotaReached string, failedTGAllocs map[string]*AllocMetric) *Evaluation { 10427 now := time.Now().UTC().UnixNano() 10428 return &Evaluation{ 10429 ID: uuid.Generate(), 10430 Namespace: e.Namespace, 10431 Priority: e.Priority, 10432 Type: e.Type, 10433 TriggeredBy: EvalTriggerQueuedAllocs, 10434 JobID: e.JobID, 10435 JobModifyIndex: e.JobModifyIndex, 10436 Status: EvalStatusBlocked, 10437 PreviousEval: e.ID, 10438 FailedTGAllocs: failedTGAllocs, 10439 ClassEligibility: classEligibility, 10440 EscapedComputedClass: escaped, 10441 QuotaLimitReached: quotaReached, 10442 CreateTime: now, 10443 ModifyTime: now, 10444 } 10445} 10446 10447// CreateFailedFollowUpEval creates a follow up evaluation when the current one 10448// has been marked as failed because it has hit the delivery limit and will not 10449// be retried by the eval_broker. Callers should copy the created eval's ID to 10450// into the old eval's NextEval field. 10451func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 10452 now := time.Now().UTC().UnixNano() 10453 return &Evaluation{ 10454 ID: uuid.Generate(), 10455 Namespace: e.Namespace, 10456 Priority: e.Priority, 10457 Type: e.Type, 10458 TriggeredBy: EvalTriggerFailedFollowUp, 10459 JobID: e.JobID, 10460 JobModifyIndex: e.JobModifyIndex, 10461 Status: EvalStatusPending, 10462 Wait: wait, 10463 PreviousEval: e.ID, 10464 CreateTime: now, 10465 ModifyTime: now, 10466 } 10467} 10468 10469// UpdateModifyTime takes into account that clocks on different servers may be 10470// slightly out of sync. Even in case of a leader change, this method will 10471// guarantee that ModifyTime will always be after CreateTime. 10472func (e *Evaluation) UpdateModifyTime() { 10473 now := time.Now().UTC().UnixNano() 10474 if now <= e.CreateTime { 10475 e.ModifyTime = e.CreateTime + 1 10476 } else { 10477 e.ModifyTime = now 10478 } 10479} 10480 10481// Plan is used to submit a commit plan for task allocations. These 10482// are submitted to the leader which verifies that resources have 10483// not been overcommitted before admitting the plan. 10484type Plan struct { 10485 // msgpack omit empty fields during serialization 10486 _struct bool `codec:",omitempty"` // nolint: structcheck 10487 10488 // EvalID is the evaluation ID this plan is associated with 10489 EvalID string 10490 10491 // EvalToken is used to prevent a split-brain processing of 10492 // an evaluation. There should only be a single scheduler running 10493 // an Eval at a time, but this could be violated after a leadership 10494 // transition. This unique token is used to reject plans that are 10495 // being submitted from a different leader. 10496 EvalToken string 10497 10498 // Priority is the priority of the upstream job 10499 Priority int 10500 10501 // AllAtOnce is used to control if incremental scheduling of task groups 10502 // is allowed or if we must do a gang scheduling of the entire job. 10503 // If this is false, a plan may be partially applied. Otherwise, the 10504 // entire plan must be able to make progress. 10505 AllAtOnce bool 10506 10507 // Job is the parent job of all the allocations in the Plan. 10508 // Since a Plan only involves a single Job, we can reduce the size 10509 // of the plan by only including it once. 10510 Job *Job 10511 10512 // NodeUpdate contains all the allocations for each node. For each node, 10513 // this is a list of the allocations to update to either stop or evict. 10514 NodeUpdate map[string][]*Allocation 10515 10516 // NodeAllocation contains all the allocations for each node. 10517 // The evicts must be considered prior to the allocations. 10518 NodeAllocation map[string][]*Allocation 10519 10520 // Annotations contains annotations by the scheduler to be used by operators 10521 // to understand the decisions made by the scheduler. 10522 Annotations *PlanAnnotations 10523 10524 // Deployment is the deployment created or updated by the scheduler that 10525 // should be applied by the planner. 10526 Deployment *Deployment 10527 10528 // DeploymentUpdates is a set of status updates to apply to the given 10529 // deployments. This allows the scheduler to cancel any unneeded deployment 10530 // because the job is stopped or the update block is removed. 10531 DeploymentUpdates []*DeploymentStatusUpdate 10532 10533 // NodePreemptions is a map from node id to a set of allocations from other 10534 // lower priority jobs that are preempted. Preempted allocations are marked 10535 // as evicted. 10536 NodePreemptions map[string][]*Allocation 10537 10538 // SnapshotIndex is the Raft index of the snapshot used to create the 10539 // Plan. The leader will wait to evaluate the plan until its StateStore 10540 // has reached at least this index. 10541 SnapshotIndex uint64 10542} 10543 10544// AppendStoppedAlloc marks an allocation to be stopped. The clientStatus of the 10545// allocation may be optionally set by passing in a non-empty value. 10546func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus, followupEvalID string) { 10547 newAlloc := new(Allocation) 10548 *newAlloc = *alloc 10549 10550 // If the job is not set in the plan we are deregistering a job so we 10551 // extract the job from the allocation. 10552 if p.Job == nil && newAlloc.Job != nil { 10553 p.Job = newAlloc.Job 10554 } 10555 10556 // Normalize the job 10557 newAlloc.Job = nil 10558 10559 // Strip the resources as it can be rebuilt. 10560 newAlloc.Resources = nil 10561 10562 newAlloc.DesiredStatus = AllocDesiredStatusStop 10563 newAlloc.DesiredDescription = desiredDesc 10564 10565 if clientStatus != "" { 10566 newAlloc.ClientStatus = clientStatus 10567 } 10568 10569 newAlloc.AppendState(AllocStateFieldClientStatus, clientStatus) 10570 10571 if followupEvalID != "" { 10572 newAlloc.FollowupEvalID = followupEvalID 10573 } 10574 10575 node := alloc.NodeID 10576 existing := p.NodeUpdate[node] 10577 p.NodeUpdate[node] = append(existing, newAlloc) 10578} 10579 10580// AppendPreemptedAlloc is used to append an allocation that's being preempted to the plan. 10581// To minimize the size of the plan, this only sets a minimal set of fields in the allocation 10582func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string) { 10583 newAlloc := &Allocation{} 10584 newAlloc.ID = alloc.ID 10585 newAlloc.JobID = alloc.JobID 10586 newAlloc.Namespace = alloc.Namespace 10587 newAlloc.DesiredStatus = AllocDesiredStatusEvict 10588 newAlloc.PreemptedByAllocation = preemptingAllocID 10589 10590 desiredDesc := fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocID) 10591 newAlloc.DesiredDescription = desiredDesc 10592 10593 // TaskResources are needed by the plan applier to check if allocations fit 10594 // after removing preempted allocations 10595 if alloc.AllocatedResources != nil { 10596 newAlloc.AllocatedResources = alloc.AllocatedResources 10597 } else { 10598 // COMPAT Remove in version 0.11 10599 newAlloc.TaskResources = alloc.TaskResources 10600 newAlloc.SharedResources = alloc.SharedResources 10601 } 10602 10603 // Append this alloc to slice for this node 10604 node := alloc.NodeID 10605 existing := p.NodePreemptions[node] 10606 p.NodePreemptions[node] = append(existing, newAlloc) 10607} 10608 10609func (p *Plan) PopUpdate(alloc *Allocation) { 10610 existing := p.NodeUpdate[alloc.NodeID] 10611 n := len(existing) 10612 if n > 0 && existing[n-1].ID == alloc.ID { 10613 existing = existing[:n-1] 10614 if len(existing) > 0 { 10615 p.NodeUpdate[alloc.NodeID] = existing 10616 } else { 10617 delete(p.NodeUpdate, alloc.NodeID) 10618 } 10619 } 10620} 10621 10622// AppendAlloc appends the alloc to the plan allocations. 10623// Uses the passed job if explicitly passed, otherwise 10624// it is assumed the alloc will use the plan Job version. 10625func (p *Plan) AppendAlloc(alloc *Allocation, job *Job) { 10626 node := alloc.NodeID 10627 existing := p.NodeAllocation[node] 10628 10629 alloc.Job = job 10630 10631 p.NodeAllocation[node] = append(existing, alloc) 10632} 10633 10634// IsNoOp checks if this plan would do nothing 10635func (p *Plan) IsNoOp() bool { 10636 return len(p.NodeUpdate) == 0 && 10637 len(p.NodeAllocation) == 0 && 10638 p.Deployment == nil && 10639 len(p.DeploymentUpdates) == 0 10640} 10641 10642// NormalizeAllocations normalizes allocations to remove fields that can 10643// be fetched from the MemDB instead of sending over the wire 10644func (p *Plan) NormalizeAllocations() { 10645 for _, allocs := range p.NodeUpdate { 10646 for i, alloc := range allocs { 10647 allocs[i] = &Allocation{ 10648 ID: alloc.ID, 10649 DesiredDescription: alloc.DesiredDescription, 10650 ClientStatus: alloc.ClientStatus, 10651 FollowupEvalID: alloc.FollowupEvalID, 10652 } 10653 } 10654 } 10655 10656 for _, allocs := range p.NodePreemptions { 10657 for i, alloc := range allocs { 10658 allocs[i] = &Allocation{ 10659 ID: alloc.ID, 10660 PreemptedByAllocation: alloc.PreemptedByAllocation, 10661 } 10662 } 10663 } 10664} 10665 10666// PlanResult is the result of a plan submitted to the leader. 10667type PlanResult struct { 10668 // NodeUpdate contains all the updates that were committed. 10669 NodeUpdate map[string][]*Allocation 10670 10671 // NodeAllocation contains all the allocations that were committed. 10672 NodeAllocation map[string][]*Allocation 10673 10674 // Deployment is the deployment that was committed. 10675 Deployment *Deployment 10676 10677 // DeploymentUpdates is the set of deployment updates that were committed. 10678 DeploymentUpdates []*DeploymentStatusUpdate 10679 10680 // NodePreemptions is a map from node id to a set of allocations from other 10681 // lower priority jobs that are preempted. Preempted allocations are marked 10682 // as stopped. 10683 NodePreemptions map[string][]*Allocation 10684 10685 // RefreshIndex is the index the worker should refresh state up to. 10686 // This allows all evictions and allocations to be materialized. 10687 // If any allocations were rejected due to stale data (node state, 10688 // over committed) this can be used to force a worker refresh. 10689 RefreshIndex uint64 10690 10691 // AllocIndex is the Raft index in which the evictions and 10692 // allocations took place. This is used for the write index. 10693 AllocIndex uint64 10694} 10695 10696// IsNoOp checks if this plan result would do nothing 10697func (p *PlanResult) IsNoOp() bool { 10698 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 10699 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 10700} 10701 10702// FullCommit is used to check if all the allocations in a plan 10703// were committed as part of the result. Returns if there was 10704// a match, and the number of expected and actual allocations. 10705func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 10706 expected := 0 10707 actual := 0 10708 for name, allocList := range plan.NodeAllocation { 10709 didAlloc := p.NodeAllocation[name] 10710 expected += len(allocList) 10711 actual += len(didAlloc) 10712 } 10713 return actual == expected, expected, actual 10714} 10715 10716// PlanAnnotations holds annotations made by the scheduler to give further debug 10717// information to operators. 10718type PlanAnnotations struct { 10719 // DesiredTGUpdates is the set of desired updates per task group. 10720 DesiredTGUpdates map[string]*DesiredUpdates 10721 10722 // PreemptedAllocs is the set of allocations to be preempted to make the placement successful. 10723 PreemptedAllocs []*AllocListStub 10724} 10725 10726// DesiredUpdates is the set of changes the scheduler would like to make given 10727// sufficient resources and cluster capacity. 10728type DesiredUpdates struct { 10729 Ignore uint64 10730 Place uint64 10731 Migrate uint64 10732 Stop uint64 10733 InPlaceUpdate uint64 10734 DestructiveUpdate uint64 10735 Canary uint64 10736 Preemptions uint64 10737} 10738 10739func (d *DesiredUpdates) GoString() string { 10740 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 10741 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 10742} 10743 10744// msgpackHandle is a shared handle for encoding/decoding of structs 10745var MsgpackHandle = func() *codec.MsgpackHandle { 10746 h := &codec.MsgpackHandle{} 10747 h.RawToString = true 10748 10749 // maintain binary format from time prior to upgrading latest ugorji 10750 h.BasicHandle.TimeNotBuiltin = true 10751 10752 // Sets the default type for decoding a map into a nil interface{}. 10753 // This is necessary in particular because we store the driver configs as a 10754 // nil interface{}. 10755 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 10756 10757 // only review struct codec tags 10758 h.TypeInfos = codec.NewTypeInfos([]string{"codec"}) 10759 10760 return h 10761}() 10762 10763// Decode is used to decode a MsgPack encoded object 10764func Decode(buf []byte, out interface{}) error { 10765 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 10766} 10767 10768// Encode is used to encode a MsgPack object with type prefix 10769func Encode(t MessageType, msg interface{}) ([]byte, error) { 10770 var buf bytes.Buffer 10771 buf.WriteByte(uint8(t)) 10772 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 10773 return buf.Bytes(), err 10774} 10775 10776// KeyringResponse is a unified key response and can be used for install, 10777// remove, use, as well as listing key queries. 10778type KeyringResponse struct { 10779 Messages map[string]string 10780 Keys map[string]int 10781 NumNodes int 10782} 10783 10784// KeyringRequest is request objects for serf key operations. 10785type KeyringRequest struct { 10786 Key string 10787} 10788 10789// RecoverableError wraps an error and marks whether it is recoverable and could 10790// be retried or it is fatal. 10791type RecoverableError struct { 10792 Err string 10793 Recoverable bool 10794} 10795 10796// NewRecoverableError is used to wrap an error and mark it as recoverable or 10797// not. 10798func NewRecoverableError(e error, recoverable bool) error { 10799 if e == nil { 10800 return nil 10801 } 10802 10803 return &RecoverableError{ 10804 Err: e.Error(), 10805 Recoverable: recoverable, 10806 } 10807} 10808 10809// WrapRecoverable wraps an existing error in a new RecoverableError with a new 10810// message. If the error was recoverable before the returned error is as well; 10811// otherwise it is unrecoverable. 10812func WrapRecoverable(msg string, err error) error { 10813 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 10814} 10815 10816func (r *RecoverableError) Error() string { 10817 return r.Err 10818} 10819 10820func (r *RecoverableError) IsRecoverable() bool { 10821 return r.Recoverable 10822} 10823 10824func (r *RecoverableError) IsUnrecoverable() bool { 10825 return !r.Recoverable 10826} 10827 10828// Recoverable is an interface for errors to implement to indicate whether or 10829// not they are fatal or recoverable. 10830type Recoverable interface { 10831 error 10832 IsRecoverable() bool 10833} 10834 10835// IsRecoverable returns true if error is a RecoverableError with 10836// Recoverable=true. Otherwise false is returned. 10837func IsRecoverable(e error) bool { 10838 if re, ok := e.(Recoverable); ok { 10839 return re.IsRecoverable() 10840 } 10841 return false 10842} 10843 10844// WrappedServerError wraps an error and satisfies 10845// both the Recoverable and the ServerSideError interfaces 10846type WrappedServerError struct { 10847 Err error 10848} 10849 10850// NewWrappedServerError is used to create a wrapped server side error 10851func NewWrappedServerError(e error) error { 10852 return &WrappedServerError{ 10853 Err: e, 10854 } 10855} 10856 10857func (r *WrappedServerError) IsRecoverable() bool { 10858 return IsRecoverable(r.Err) 10859} 10860 10861func (r *WrappedServerError) Error() string { 10862 return r.Err.Error() 10863} 10864 10865func (r *WrappedServerError) IsServerSide() bool { 10866 return true 10867} 10868 10869// ServerSideError is an interface for errors to implement to indicate 10870// errors occurring after the request makes it to a server 10871type ServerSideError interface { 10872 error 10873 IsServerSide() bool 10874} 10875 10876// IsServerSide returns true if error is a wrapped 10877// server side error 10878func IsServerSide(e error) bool { 10879 if se, ok := e.(ServerSideError); ok { 10880 return se.IsServerSide() 10881 } 10882 return false 10883} 10884 10885// ACLPolicy is used to represent an ACL policy 10886type ACLPolicy struct { 10887 Name string // Unique name 10888 Description string // Human readable 10889 Rules string // HCL or JSON format 10890 RulesJSON *acl.Policy // Generated from Rules on read 10891 Hash []byte 10892 CreateIndex uint64 10893 ModifyIndex uint64 10894} 10895 10896// SetHash is used to compute and set the hash of the ACL policy 10897func (c *ACLPolicy) SetHash() []byte { 10898 // Initialize a 256bit Blake2 hash (32 bytes) 10899 hash, err := blake2b.New256(nil) 10900 if err != nil { 10901 panic(err) 10902 } 10903 10904 // Write all the user set fields 10905 _, _ = hash.Write([]byte(c.Name)) 10906 _, _ = hash.Write([]byte(c.Description)) 10907 _, _ = hash.Write([]byte(c.Rules)) 10908 10909 // Finalize the hash 10910 hashVal := hash.Sum(nil) 10911 10912 // Set and return the hash 10913 c.Hash = hashVal 10914 return hashVal 10915} 10916 10917func (a *ACLPolicy) Stub() *ACLPolicyListStub { 10918 return &ACLPolicyListStub{ 10919 Name: a.Name, 10920 Description: a.Description, 10921 Hash: a.Hash, 10922 CreateIndex: a.CreateIndex, 10923 ModifyIndex: a.ModifyIndex, 10924 } 10925} 10926 10927func (a *ACLPolicy) Validate() error { 10928 var mErr multierror.Error 10929 if !validPolicyName.MatchString(a.Name) { 10930 err := fmt.Errorf("invalid name '%s'", a.Name) 10931 mErr.Errors = append(mErr.Errors, err) 10932 } 10933 if _, err := acl.Parse(a.Rules); err != nil { 10934 err = fmt.Errorf("failed to parse rules: %v", err) 10935 mErr.Errors = append(mErr.Errors, err) 10936 } 10937 if len(a.Description) > maxPolicyDescriptionLength { 10938 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 10939 mErr.Errors = append(mErr.Errors, err) 10940 } 10941 return mErr.ErrorOrNil() 10942} 10943 10944// ACLPolicyListStub is used to for listing ACL policies 10945type ACLPolicyListStub struct { 10946 Name string 10947 Description string 10948 Hash []byte 10949 CreateIndex uint64 10950 ModifyIndex uint64 10951} 10952 10953// ACLPolicyListRequest is used to request a list of policies 10954type ACLPolicyListRequest struct { 10955 QueryOptions 10956} 10957 10958// ACLPolicySpecificRequest is used to query a specific policy 10959type ACLPolicySpecificRequest struct { 10960 Name string 10961 QueryOptions 10962} 10963 10964// ACLPolicySetRequest is used to query a set of policies 10965type ACLPolicySetRequest struct { 10966 Names []string 10967 QueryOptions 10968} 10969 10970// ACLPolicyListResponse is used for a list request 10971type ACLPolicyListResponse struct { 10972 Policies []*ACLPolicyListStub 10973 QueryMeta 10974} 10975 10976// SingleACLPolicyResponse is used to return a single policy 10977type SingleACLPolicyResponse struct { 10978 Policy *ACLPolicy 10979 QueryMeta 10980} 10981 10982// ACLPolicySetResponse is used to return a set of policies 10983type ACLPolicySetResponse struct { 10984 Policies map[string]*ACLPolicy 10985 QueryMeta 10986} 10987 10988// ACLPolicyDeleteRequest is used to delete a set of policies 10989type ACLPolicyDeleteRequest struct { 10990 Names []string 10991 WriteRequest 10992} 10993 10994// ACLPolicyUpsertRequest is used to upsert a set of policies 10995type ACLPolicyUpsertRequest struct { 10996 Policies []*ACLPolicy 10997 WriteRequest 10998} 10999 11000// ACLToken represents a client token which is used to Authenticate 11001type ACLToken struct { 11002 AccessorID string // Public Accessor ID (UUID) 11003 SecretID string // Secret ID, private (UUID) 11004 Name string // Human friendly name 11005 Type string // Client or Management 11006 Policies []string // Policies this token ties to 11007 Global bool // Global or Region local 11008 Hash []byte 11009 CreateTime time.Time // Time of creation 11010 CreateIndex uint64 11011 ModifyIndex uint64 11012} 11013 11014func (a *ACLToken) Copy() *ACLToken { 11015 c := new(ACLToken) 11016 *c = *a 11017 11018 c.Policies = make([]string, len(a.Policies)) 11019 copy(c.Policies, a.Policies) 11020 c.Hash = make([]byte, len(a.Hash)) 11021 copy(c.Hash, a.Hash) 11022 11023 return c 11024} 11025 11026var ( 11027 // AnonymousACLToken is used no SecretID is provided, and the 11028 // request is made anonymously. 11029 AnonymousACLToken = &ACLToken{ 11030 AccessorID: "anonymous", 11031 Name: "Anonymous Token", 11032 Type: ACLClientToken, 11033 Policies: []string{"anonymous"}, 11034 Global: false, 11035 } 11036) 11037 11038type ACLTokenListStub struct { 11039 AccessorID string 11040 Name string 11041 Type string 11042 Policies []string 11043 Global bool 11044 Hash []byte 11045 CreateTime time.Time 11046 CreateIndex uint64 11047 ModifyIndex uint64 11048} 11049 11050// SetHash is used to compute and set the hash of the ACL token 11051func (a *ACLToken) SetHash() []byte { 11052 // Initialize a 256bit Blake2 hash (32 bytes) 11053 hash, err := blake2b.New256(nil) 11054 if err != nil { 11055 panic(err) 11056 } 11057 11058 // Write all the user set fields 11059 _, _ = hash.Write([]byte(a.Name)) 11060 _, _ = hash.Write([]byte(a.Type)) 11061 for _, policyName := range a.Policies { 11062 _, _ = hash.Write([]byte(policyName)) 11063 } 11064 if a.Global { 11065 _, _ = hash.Write([]byte("global")) 11066 } else { 11067 _, _ = hash.Write([]byte("local")) 11068 } 11069 11070 // Finalize the hash 11071 hashVal := hash.Sum(nil) 11072 11073 // Set and return the hash 11074 a.Hash = hashVal 11075 return hashVal 11076} 11077 11078func (a *ACLToken) Stub() *ACLTokenListStub { 11079 return &ACLTokenListStub{ 11080 AccessorID: a.AccessorID, 11081 Name: a.Name, 11082 Type: a.Type, 11083 Policies: a.Policies, 11084 Global: a.Global, 11085 Hash: a.Hash, 11086 CreateTime: a.CreateTime, 11087 CreateIndex: a.CreateIndex, 11088 ModifyIndex: a.ModifyIndex, 11089 } 11090} 11091 11092// Validate is used to check a token for reasonableness 11093func (a *ACLToken) Validate() error { 11094 var mErr multierror.Error 11095 if len(a.Name) > maxTokenNameLength { 11096 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 11097 } 11098 switch a.Type { 11099 case ACLClientToken: 11100 if len(a.Policies) == 0 { 11101 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 11102 } 11103 case ACLManagementToken: 11104 if len(a.Policies) != 0 { 11105 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 11106 } 11107 default: 11108 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 11109 } 11110 return mErr.ErrorOrNil() 11111} 11112 11113// PolicySubset checks if a given set of policies is a subset of the token 11114func (a *ACLToken) PolicySubset(policies []string) bool { 11115 // Hot-path the management tokens, superset of all policies. 11116 if a.Type == ACLManagementToken { 11117 return true 11118 } 11119 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 11120 for _, policy := range a.Policies { 11121 associatedPolicies[policy] = struct{}{} 11122 } 11123 for _, policy := range policies { 11124 if _, ok := associatedPolicies[policy]; !ok { 11125 return false 11126 } 11127 } 11128 return true 11129} 11130 11131// ACLTokenListRequest is used to request a list of tokens 11132type ACLTokenListRequest struct { 11133 GlobalOnly bool 11134 QueryOptions 11135} 11136 11137// ACLTokenSpecificRequest is used to query a specific token 11138type ACLTokenSpecificRequest struct { 11139 AccessorID string 11140 QueryOptions 11141} 11142 11143// ACLTokenSetRequest is used to query a set of tokens 11144type ACLTokenSetRequest struct { 11145 AccessorIDS []string 11146 QueryOptions 11147} 11148 11149// ACLTokenListResponse is used for a list request 11150type ACLTokenListResponse struct { 11151 Tokens []*ACLTokenListStub 11152 QueryMeta 11153} 11154 11155// SingleACLTokenResponse is used to return a single token 11156type SingleACLTokenResponse struct { 11157 Token *ACLToken 11158 QueryMeta 11159} 11160 11161// ACLTokenSetResponse is used to return a set of token 11162type ACLTokenSetResponse struct { 11163 Tokens map[string]*ACLToken // Keyed by Accessor ID 11164 QueryMeta 11165} 11166 11167// ResolveACLTokenRequest is used to resolve a specific token 11168type ResolveACLTokenRequest struct { 11169 SecretID string 11170 QueryOptions 11171} 11172 11173// ResolveACLTokenResponse is used to resolve a single token 11174type ResolveACLTokenResponse struct { 11175 Token *ACLToken 11176 QueryMeta 11177} 11178 11179// ACLTokenDeleteRequest is used to delete a set of tokens 11180type ACLTokenDeleteRequest struct { 11181 AccessorIDs []string 11182 WriteRequest 11183} 11184 11185// ACLTokenBootstrapRequest is used to bootstrap ACLs 11186type ACLTokenBootstrapRequest struct { 11187 Token *ACLToken // Not client specifiable 11188 ResetIndex uint64 // Reset index is used to clear the bootstrap token 11189 WriteRequest 11190} 11191 11192// ACLTokenUpsertRequest is used to upsert a set of tokens 11193type ACLTokenUpsertRequest struct { 11194 Tokens []*ACLToken 11195 WriteRequest 11196} 11197 11198// ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 11199type ACLTokenUpsertResponse struct { 11200 Tokens []*ACLToken 11201 WriteMeta 11202} 11203 11204// OneTimeToken is used to log into the web UI using a token provided by the 11205// command line. 11206type OneTimeToken struct { 11207 OneTimeSecretID string 11208 AccessorID string 11209 ExpiresAt time.Time 11210 CreateIndex uint64 11211 ModifyIndex uint64 11212} 11213 11214// OneTimeTokenUpsertRequest is the request for a UpsertOneTimeToken RPC 11215type OneTimeTokenUpsertRequest struct { 11216 WriteRequest 11217} 11218 11219// OneTimeTokenUpsertResponse is the response to a UpsertOneTimeToken RPC. 11220type OneTimeTokenUpsertResponse struct { 11221 OneTimeToken *OneTimeToken 11222 WriteMeta 11223} 11224 11225// OneTimeTokenExchangeRequest is a request to swap the one-time token with 11226// the backing ACL token 11227type OneTimeTokenExchangeRequest struct { 11228 OneTimeSecretID string 11229 WriteRequest 11230} 11231 11232// OneTimeTokenExchangeResponse is the response to swapping the one-time token 11233// with the backing ACL token 11234type OneTimeTokenExchangeResponse struct { 11235 Token *ACLToken 11236 WriteMeta 11237} 11238 11239// OneTimeTokenDeleteRequest is a request to delete a group of one-time tokens 11240type OneTimeTokenDeleteRequest struct { 11241 AccessorIDs []string 11242 WriteRequest 11243} 11244 11245// OneTimeTokenExpireRequest is a request to delete all expired one-time tokens 11246type OneTimeTokenExpireRequest struct { 11247 WriteRequest 11248} 11249 11250// RpcError is used for serializing errors with a potential error code 11251type RpcError struct { 11252 Message string 11253 Code *int64 11254} 11255 11256func NewRpcError(err error, code *int64) *RpcError { 11257 return &RpcError{ 11258 Message: err.Error(), 11259 Code: code, 11260 } 11261} 11262 11263func (r *RpcError) Error() string { 11264 return r.Message 11265} 11266