1local service_google = import '../service/google.libsonnet';
2
3// TODO(dcunnin): Separate into 5 mixins:
4// 1) mixins that sets up cassandra via 1) grabbing JAR from cassandra.org, 2) using Debian
5// 2) mixins that sets up infrastructure on 1) GCP 2) AWS
6// 3) mixin that does the common stuff (different types of nodes, cassandra bootstrap)
7
8
9{
10
11  // Unchanged from Cassandra distribution
12  local default_conf = {
13    authenticator: 'AllowAllAuthenticator',
14    authorizer: 'AllowAllAuthorizer',
15    auto_snapshot: true,
16    batch_size_warn_threshold_in_kb: 5,
17    batchlog_replay_throttle_in_kb: 1024,
18    cas_contention_timeout_in_ms: 1000,
19    client_encryption_options: {
20      enabled: false,
21      keystore: 'conf/.keystore',
22      keystore_password: 'cassandra',
23    },
24    cluster_name: 'Unnamed Cluster',
25    column_index_size_in_kb: 64,
26    commit_failure_policy: 'stop',
27    commitlog_directory: '/var/lib/cassandra/commitlog',
28    commitlog_segment_size_in_mb: 32,
29    commitlog_sync: 'periodic',
30    commitlog_sync_period_in_ms: 10000,
31    compaction_throughput_mb_per_sec: 16,
32    concurrent_counter_writes: 32,
33    concurrent_reads: 32,
34    concurrent_writes: 32,
35    counter_cache_save_period: 7200,
36    counter_cache_size_in_mb: null,
37    counter_write_request_timeout_in_ms: 5000,
38    cross_node_timeout: false,
39    data_file_directories: ['/var/lib/cassandra/data'],
40    disk_failure_policy: 'stop',
41    dynamic_snitch_badness_threshold: 0.1,
42    dynamic_snitch_reset_interval_in_ms: 600000,
43    dynamic_snitch_update_interval_in_ms: 100,
44    endpoint_snitch: 'SimpleSnitch',
45    hinted_handoff_enabled: true,
46    hinted_handoff_throttle_in_kb: 1024,
47    incremental_backups: false,
48    index_summary_capacity_in_mb: null,
49    index_summary_resize_interval_in_minutes: 60,
50    inter_dc_tcp_nodelay: false,
51    internode_compression: 'all',
52    key_cache_save_period: 14400,
53    key_cache_size_in_mb: null,
54    listen_address: 'localhost',
55    max_hint_window_in_ms: 10800000,
56    max_hints_delivery_threads: 2,
57    memtable_allocation_type: 'heap_buffers',
58    native_transport_port: 9042,
59    num_tokens: 256,
60    partitioner: 'org.apache.cassandra.dht.Murmur3Partitioner',
61    permissions_validity_in_ms: 2000,
62    range_request_timeout_in_ms: 10000,
63    read_request_timeout_in_ms: 5000,
64    request_scheduler: 'org.apache.cassandra.scheduler.NoScheduler',
65    request_timeout_in_ms: 10000,
66    row_cache_save_period: 0,
67    row_cache_size_in_mb: 0,
68    rpc_address: 'localhost',
69    rpc_keepalive: true,
70    rpc_port: 9160,
71    rpc_server_type: 'sync',
72    saved_caches_directory: '/var/lib/cassandra/saved_caches',
73    seed_provider: [
74      {
75        class_name: 'org.apache.cassandra.locator.SimpleSeedProvider',
76        parameters: [{ seeds: '127.0.0.1' }],
77      },
78    ],
79    server_encryption_options: {
80      internode_encryption: 'none',
81      keystore: 'conf/.keystore',
82      keystore_password: 'cassandra',
83      truststore: 'conf/.truststore',
84      truststore_password: 'cassandra',
85    },
86    snapshot_before_compaction: false,
87    ssl_storage_port: 7001,
88    sstable_preemptive_open_interval_in_mb: 50,
89    start_native_transport: true,
90    start_rpc: true,
91    storage_port: 7000,
92    thrift_framed_transport_size_in_mb: 15,
93    tombstone_failure_threshold: 100000,
94    tombstone_warn_threshold: 1000,
95    trickle_fsync: false,
96    trickle_fsync_interval_in_kb: 10240,
97    truncate_request_timeout_in_ms: 60000,
98    write_request_timeout_in_ms: 2000,
99  },
100
101  // A line of bash that will wait for a Cassandra service to be "up".
102  local wait_for_cqlsh(user, pass, host) =
103    'while ! echo show version | cqlsh -u %s -p %s %s ; do sleep 1; done' % [user, pass, host],
104
105  // A Cassandra database service.  This supports:
106  // - Installation of Cassandra
107  // - Bootstrapping of the database in a secure manner
108  // - Configuring a replicated database
109  // - Opening all ports through the firewall
110  // - Jmx monitoring for Cassandra
111  // - Independent management of the boot disks, to allow recreation of the instances without
112  // losing the disks.
113  // TODO: changing the base image still replaces the disk, losing the data.  Using a non-OS
114  // disk for the database would fix this.
115  //
116  // How to use:
117  // Create a service with a single StarterNode with initReplicationFactor set.  The rest are
118  // TopUpNode. Don't use the instance in the Instance field, although you can extend it in order to
119  // add more base config that should be applied to both StarterNode and TopUpNode.  Then, when it's
120  // up and running, you can replace the StarterNode with a TopUpNode.
121  //
122  // cassandra.GcpDebianCassandra(..., ...) {
123  //     ...
124  //     nodes: {
125  //         n1: StarterNode {
126  //             initReplicationFactor: 3,
127  //             zone: "us-central1-b",
128  //         },
129  //         n2: TopUpNode {
130  //             zone: "us-central1-c",
131  //         },
132  //
133  //         n3: TopUpNode {
134  //             zone: "us-central1-f",
135  //         },
136  //     },
137  // }
138  GcpDebianCassandra(outer, name): service_google.InstanceBasedService(outer, name) {
139
140    local service = self,
141
142    // Set the root password here.
143    rootPassword:: error 'Cassandra Service must have field: rootPassword',
144
145    // This is the name Cassandra uses to identify peers that form quorum on the same database.
146    // It can be overidden but this default seems reasonable.
147    clusterName:: self.fullName,
148
149    cassandraConf:: default_conf {
150      authenticator: 'PasswordAuthenticator',
151      cluster_name: service.clusterName,
152    },
153
154    nodes:: error 'Configure nodes for the cassandra database.',
155
156    gossipPorts:: ['7000', '7001', '7199'],  // Only between this pool.
157    fwTcpPorts+: ['9042', '9160'],
158
159    Instance+: {
160      local node = self,
161
162      // Beefier machine type, since this is Java.
163      machine_type: 'n1-standard-1',
164      rootPassword:: service.rootPassword,
165      conf:: service.cassandraConf,
166
167      // Aside from installing Cassandra, we initialise the database using a bootstrap configuration
168      // and some simple SQL to set the root password.  Further configuration is done not in Packer
169      // but as the Instances boot for the first time.
170      StandardRootImage+: {
171        aptKeyUrls+: ['https://www.apache.org/dist/cassandra/KEYS'],
172        aptRepoLines+: {
173          cassandra: 'deb http://www.apache.org/dist/cassandra/debian 311x main',
174        },
175        aptPackages+: ['cassandra'],
176
177        local bootstrap_conf = default_conf {
178          authenticator: 'PasswordAuthenticator',
179        },
180
181        cmds+: [
182          '# Shut it down',
183          '/etc/init.d/cassandra stop',
184
185          '# Remove junk from unconfigured startup',
186          'rm -rfv /var/lib/cassandra/*',
187
188          '# Enable authentication',
189          local dest = '/etc/cassandra/cassandra.yaml';
190          'echo %s > %s' % [std.escapeStringBash('' + bootstrap_conf), dest],
191
192          '# Start it up again (for some reason "start" does not do anything...)',
193          '/etc/init.d/cassandra restart',
194
195          '# Wait for it to be ready',
196          wait_for_cqlsh('cassandra', 'cassandra', 'localhost'),
197
198          '# Set root password.',
199          local cql = "ALTER USER cassandra WITH PASSWORD '%s';" % node.rootPassword;
200          'echo %s | cqlsh -u cassandra -p cassandra' % std.escapeStringBash(cql),
201        ],
202      },
203
204      enableMonitoring: self.supportsMonitoring,
205      enableJmxMonitoring: self.supportsJmxMonitoring,
206      jmxHost: 'localhost',
207      jmxPort: 7199,
208      jmxLocalhostConfig+: {
209        SdCassQuery:: self.SdQuery {
210          attr: [
211            'ActiveCount',
212            'CompletedTasks',
213            'CurrentlyBlockedTasks',
214            'PendingTasks',
215          ],
216        },
217        queries+: [
218          self.SdQuery {
219            resultAlias: 'cassandra.storageservice',
220            obj: 'org.apache.cassandra.db:type=StorageService',
221            attr: ['Load', 'ExceptionCount'],
222          },
223          self.SdQuery {
224            resultAlias: 'cassandra.commitlog',
225            obj: 'org.apache.cassandra.db:type=Commitlog',
226            attr: ['CompletedTasks', 'PendingTasks', 'TotalCommitlogSize'],
227          },
228          self.SdQuery {
229            resultAlias: 'cassandra.compactionmanager',
230            obj: 'org.apache.cassandra.db:type=CompactionManager',
231            attr: ['PendingTasks', 'CompletedTasks'],
232          },
233          self.SdQuery {
234            resultAlias: 'cassandra.stage.MutationStage',
235            obj: 'org.apache.cassandra.request:type=MutationStage',
236          },
237          self.SdCassQuery {
238            resultAlias: 'cassandra.stage.ReadRepairStage',
239            obj: 'org.apache.cassandra.request:type=ReadRepairStage',
240          },
241          self.SdCassQuery {
242            resultAlias: 'cassandra.stage.ReadStage',
243            obj: 'org.apache.cassandra.request:type=ReadStage',
244          },
245          self.SdCassQuery {
246            resultAlias: 'cassandra.stage.ReplicateOnWriteStage',
247            obj: 'org.apache.cassandra.request:type=ReplicateOnWriteStage',
248          },
249          self.SdCassQuery {
250            resultAlias: 'cassandra.stage.RequestResponseStage',
251            obj: 'org.apache.cassandra.request:type=RequestResponseStage',
252          },
253          self.SdCassQuery {
254            resultAlias: 'cassandra.internal.AntiEntropySessions',
255            obj: 'org.apache.cassandra.internal:type=AntiEntropySessions',
256          },
257          self.SdCassQuery {
258            resultAlias: 'cassandra.internal.AntiEntropyStage',
259            obj: 'org.apache.cassandra.internal:type=AntiEntropyStage',
260          },
261          self.SdCassQuery {
262            resultAlias: 'cassandra.internal.FlushWriter',
263            obj: 'org.apache.cassandra.internal:type=FlushWriter',
264          },
265          self.SdCassQuery {
266            resultAlias: 'cassandra.internal.GossipStage',
267            obj: 'org.apache.cassandra.internal:type=GossipStage',
268          },
269          self.SdCassQuery {
270            resultAlias: 'cassandra.internal.HintedHandoff',
271            obj: 'org.apache.cassandra.internal:type=HintedHandoff',
272          },
273          self.SdCassQuery {
274            resultAlias: 'cassandra.internal.InternalResponseStage',
275            obj: 'org.apache.cassandra.internal:type=InternalResponseStage',
276          },
277          self.SdCassQuery {
278            resultAlias: 'cassandra.internal.MemtablePostFlusher',
279            obj: 'org.apache.cassandra.internal:type=MemtablePostFlusher',
280          },
281          self.SdCassQuery {
282            resultAlias: 'cassandra.internal.MigrationStage',
283            obj: 'org.apache.cassandra.internal:type=MigrationStage',
284          },
285          self.SdCassQuery {
286            resultAlias: 'cassandra.internal.MiscStage',
287            obj: 'org.apache.cassandra.internal:type=MiscStage',
288          },
289          self.SdCassQuery {
290            resultAlias: 'cassandra.internal.StreamStage',
291            obj: 'org.apache.cassandra.internal:type=StreamStage',
292          },
293          self.SdQuery {
294            resultAlias: 'cassandra.internal.StorageProxy',
295            obj: 'org.apache.cassandra.db:type=StorageProxy',
296            attr: [
297              'RecentReadLatencyMicros',
298              'RecentWriteLatencyMicros',
299              'RecentRangeLatencyMicros',
300              'HintsInProgress',
301            ],
302          },
303        ],
304      },
305
306      enableLogging: self.supportsLogging,
307    },
308
309    // Bootstraps the database.
310    // When the database initially boots, it is password protected with rootPassword, but is not
311    // configured otherwise.  We wait for it to start, do some basic configuration and then
312    // restart it with version.conf installed.
313    StarterNode:: self.Instance {
314      initReplicationFactor:: error "Needs 'initReplicationFactor'",
315      initCql:: [],
316      initReplication::
317        "{ 'class' : 'SimpleStrategy', 'replication_factor' : %d }"
318        % self.initReplicationFactor,
319      initAuthReplication:: self.initReplication,
320
321      cmds+: [
322        '# Wait for the misconfigured cassandra to start up.',
323        wait_for_cqlsh('cassandra', self.rootPassword, 'localhost'),
324
325        '# Change the cluster name.',
326        'echo %s | cqlsh -u cassandra -p %s localhost'
327        % [
328          std.escapeStringBash(
329            "UPDATE system.local SET cluster_name = '%s' where key='local';"
330            % self.conf.cluster_name
331          ),
332          self.rootPassword,
333        ],
334
335        '# Set up system_auth replication level.',
336        'echo %s | cqlsh -u cassandra -p %s localhost'
337        % [
338          std.escapeStringBash('ALTER KEYSPACE system_auth WITH REPLICATION = %s;'
339                               % self.initAuthReplication),
340          self.rootPassword,
341        ],
342
343        '# Drop in the correct configuration.',
344        'echo %s > %s'
345        % [std.escapeStringBash('' + self.conf), '/etc/cassandra/cassandra.yaml'],
346
347        '# Restart with new configuration.',
348        '/etc/init.d/cassandra restart',
349
350        '# Wait for the properly configured cassandra to start up and reach quorum.',
351        wait_for_cqlsh('cassandra', self.rootPassword, '$HOSTNAME'),
352
353        // See https://issues.apache.org/jira/browse/CASSANDRA-11942
354        // This was added because otherwise self.initCql cannot set up users.
355        'sleep 10',
356
357        '# Set up users, empty tables, etc.',
358        'echo %s | cqlsh -u cassandra -p %s $HOSTNAME'
359        % [std.escapeStringBash(std.lines(self.initCql)), self.rootPassword],
360      ],
361    },
362
363    // Simply restarts with the given config.  The assumption is that when it starts it will join
364    // with seeds and initialize itself, including receiving any replicated copies of the data it
365    // needs.
366    TopUpNode:: self.Instance {
367      cmds+: [
368        // Wait for the misconfigured cassandra to start up.
369        wait_for_cqlsh('cassandra', self.rootPassword, 'localhost'),
370
371        // Kill it.
372        '/etc/init.d/cassandra stop',
373
374        // Clean up the mess it caused due to being misconfigured.
375        'rm -rf /var/lib/cassandra/*',
376
377        // Drop in the correct configuration.
378        'echo %s > %s'
379        % [std.escapeStringBash('' + self.conf), '/etc/cassandra/cassandra.yaml'],
380
381        // Start it up again.
382        '/etc/init.d/cassandra restart',
383      ],
384    },
385
386    infrastructure+: {
387      google_compute_disk: {
388        [service.prefixName(n)]: {
389          name: service.prefixName(n),
390          image: service.nodes[n].StandardRootImage,
391          zone: service.nodes[n].zone,
392        }
393        for n in std.objectFields(service.nodes)
394      },
395
396      google_compute_instance: {
397        [service.prefixName(n)]: service.nodes[n] {
398          local instance = self,
399          name: service.prefixName(n),
400          networkName: service.networkName,
401          tags+: [service.clusterName],
402          boot_disk: {
403            source: '${google_compute_disk.%s.name}' % instance.name,
404            auto_delete: false,
405          },
406        }
407        for n in std.objectFields(service.nodes)
408      },
409    },
410  },
411}
412