1# Copyright (C) 2015-2021 Regents of the University of California
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13import socket
14
15from argparse import ArgumentParser, _ArgumentGroup
16from contextlib import closing
17from typing import Callable, Union
18
19from toil.batchSystems.registry import (BATCH_SYSTEM_FACTORY_REGISTRY,
20                                        BATCH_SYSTEMS,
21                                        DEFAULT_BATCH_SYSTEM)
22from toil.lib.threading import cpu_count
23
24
25def getPublicIP() -> str:
26    """Get the IP that this machine uses to contact the internet.
27
28    If behind a NAT, this will still be this computer's IP, and not the router's."""
29    try:
30        # Try to get the internet-facing IP by attempting a connection
31        # to a non-existent server and reading what IP was used.
32        with closing(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) as sock:
33            # 203.0.113.0/24 is reserved as TEST-NET-3 by RFC 5737, so
34            # there is guaranteed to be no one listening on the other
35            # end (and we won't accidentally DOS anyone).
36            sock.connect(('203.0.113.1', 1))
37            ip = sock.getsockname()[0]
38        return ip
39    except:
40        # Something went terribly wrong. Just give loopback rather
41        # than killing everything, because this is often called just
42        # to provide a default argument
43        return '127.0.0.1'
44
45
46def add_parasol_options(parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
47    parser.add_argument("--parasolCommand", dest="parasolCommand", default='parasol',
48                        help="The name or path of the parasol program. Will be looked up on PATH "
49                             "unless it starts with a slash.  (default: %(default)s).")
50    parser.add_argument("--parasolMaxBatches", dest="parasolMaxBatches", default=1000,
51                        help="Maximum number of job batches the Parasol batch is allowed to create. One batch is "
52                             "created for jobs with a a unique set of resource requirements.  (default: %(default)s).")
53
54
55def add_single_machine_options(parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
56    parser.add_argument("--scale", dest="scale", default=1,
57                        help="A scaling factor to change the value of all submitted tasks's submitted cores.  "
58                             "Used in the single_machine batch system.  (default: %(default)s).")
59
60    link_imports = parser.add_mutually_exclusive_group()
61    link_imports_help = ("When using a filesystem based job store, CWL input files are by default symlinked in.  "
62                         "Specifying this option instead copies the files into the job store, which may protect "
63                         "them from being modified externally.  When not specified and as long as caching is enabled, "
64                         "Toil will protect the file automatically by changing the permissions to read-only.")
65    link_imports.add_argument("--linkImports", dest="linkImports", action='store_true', help=link_imports_help)
66    link_imports.add_argument("--noLinkImports", dest="linkImports", action='store_false', help=link_imports_help)
67    link_imports.set_defaults(linkImports=True)
68
69    move_exports = parser.add_mutually_exclusive_group()
70    move_exports_help = ('When using a filesystem based job store, output files are by default moved to the '
71                         'output directory, and a symlink to the moved exported file is created at the initial '
72                         'location.  Specifying this option instead copies the files into the output directory.  '
73                         'Applies to filesystem-based job stores only.')
74    move_exports.add_argument("--moveExports", dest="moveExports", action='store_true', help=move_exports_help)
75    move_exports.add_argument("--noMoveExports", dest="moveExports", action='store_false', help=move_exports_help)
76    move_exports.set_defaults(moveExports=False)
77
78
79def add_mesos_options(parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
80    parser.add_argument("--mesosMaster", dest="mesosMasterAddress", default=f'{getPublicIP()}:5050',
81                        help="The host and port of the Mesos master separated by colon.  (default: %(default)s)")
82
83
84def add_kubernetes_options(parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
85    parser.add_argument("--kubernetesHostPath", dest="kubernetesHostPath", default=None,
86                        help="Path on Kubernetes hosts to use as shared inter-pod temp directory.  "
87                             "(default: %(default)s)")
88
89def add_slurm_options(parser: Union[ArgumentParser, _ArgumentGroup]):
90    allocate_mem = parser.add_mutually_exclusive_group()
91    allocate_mem_help = ("A flag that can block allocating memory with '--mem' for job submissions "
92                         "on SLURM since some system servers may reject any job request that "
93                         "explicitly specifies the memory allocation.  The default is to always allocate memory.")
94    allocate_mem.add_argument("--dont_allocate_mem", action='store_false', dest="allocate_mem", help=allocate_mem_help)
95    allocate_mem.add_argument("--allocate_mem", action='store_true', dest="allocate_mem", help=allocate_mem_help)
96    allocate_mem.set_defaults(allocate_mem=True)
97
98def set_batchsystem_options(batch_system: str, set_option: Callable) -> None:
99    batch_system_factory = BATCH_SYSTEM_FACTORY_REGISTRY[batch_system]()
100    batch_system_factory.setOptions(set_option)
101
102
103def add_all_batchsystem_options(parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
104    # TODO: Only add options for the system the user is specifying?
105    parser.add_argument("--batchSystem", dest="batchSystem", default=DEFAULT_BATCH_SYSTEM, choices=BATCH_SYSTEMS,
106                        help=f"The type of batch system to run the job(s) with, currently can be one "
107                             f"of {', '.join(BATCH_SYSTEMS)}. default={DEFAULT_BATCH_SYSTEM}")
108    parser.add_argument("--disableHotDeployment", dest="disableAutoDeployment", action='store_true', default=None,
109                        help="Hot-deployment was renamed to auto-deployment.  Option now redirects to "
110                             "--disableAutoDeployment.  Left in for backwards compatibility.")
111    parser.add_argument("--disableAutoDeployment", dest="disableAutoDeployment", action='store_true', default=None,
112                        help="Should auto-deployment of the user script be deactivated? If True, the user "
113                             "script/package should be present at the same location on all workers.  Default = False.")
114    parser.add_argument("--maxLocalJobs", default=cpu_count(),
115                        help=f"For batch systems that support a local queue for housekeeping jobs "
116                             f"(Mesos, GridEngine, htcondor, lsf, slurm, torque).  Specifies the maximum "
117                             f"number of these housekeeping jobs to run on the local system.  The default "
118                             f"(equal to the number of cores) is a maximum of {cpu_count()} concurrent "
119                             f"local housekeeping jobs.")
120    parser.add_argument("--manualMemArgs", default=False, action='store_true', dest="manualMemArgs",
121                        help="Do not add the default arguments: 'hv=MEMORY' & 'h_vmem=MEMORY' to the qsub "
122                             "call, and instead rely on TOIL_GRIDGENGINE_ARGS to supply alternative arguments.  "
123                             "Requires that TOIL_GRIDGENGINE_ARGS be set.")
124    parser.add_argument("--runCwlInternalJobsOnWorkers", dest="runCwlInternalJobsOnWorkers", action='store_true',
125                        default=None,
126                        help="Whether to run CWL internal jobs (e.g. CWLScatter) on the worker nodes "
127                             "instead of the primary node. If false (default), then all such jobs are run on "
128                             "the primary node. Setting this to true can speed up the pipeline for very large "
129                             "workflows with many sub-workflows and/or scatters, provided that the worker "
130                             "pool is large enough.")
131
132    add_parasol_options(parser)
133    add_single_machine_options(parser)
134    add_mesos_options(parser)
135    add_slurm_options(parser)
136    add_kubernetes_options(parser)
137
138
139def set_batchsystem_config_defaults(config) -> None:
140    """
141    Set default options for builtin batch systems. This is required if a Config
142    object is not constructed from an Options object.
143    """
144    config.batchSystem = "single_machine"
145    config.disableAutoDeployment = False
146    config.environment = {}
147    config.statePollingWait = None
148    config.maxLocalJobs = cpu_count()
149    config.manualMemArgs = False
150
151    # parasol
152    config.parasolCommand = 'parasol'
153    config.parasolMaxBatches = 10000
154
155    # single machine
156    config.scale = 1
157    config.linkImports = False
158    config.moveExports = False
159
160    # mesos
161    config.mesosMasterAddress = f'{getPublicIP()}:5050'
162
163    # SLURM
164    config.allocate_mem = True
165
166    # Kubernetes
167    config.kubernetesHostPath = None
168