Add schema for cassandra v4.0.

Ashmita152 · Ashmita152 · commit 522fd50ad1e9 · 2021-08-23T23:40:29.000+10:00
We don't need to add v003tov004.sh because the change (removal of dclocal_read_repair_chance config) is in table metadata which gets handled seemlessly within cassandra upgrade when users upgrade their cassandra cluster from v3.11 to v4.0. Refactor cassandra integration tests to run on both cassandra v3.11 and v4.0.

Signed-off-by: Ashmita Bohara &lt;ashmita.bohara152@gmail.com&gt;
diff --git a/.github/workflows/ci-cassandra.yml b/.github/workflows/ci-cassandra.yml
@@ -9,6 +9,16 @@ on:
 jobs:
   cassandra:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        version:
+        - image: cassandra
+          tag: 3.11
+          schema: v001 v002 v003
+        - image: cassandra
+          tag: 4.0
+          schema: v004 v004 v004
+    name: ${{ matrix.version.image }} ${{ matrix.version.tag }}
     steps:
     - uses: actions/checkout@v2.3.4
 
@@ -17,4 +27,20 @@ jobs:
         go-version: ^1.17
 
     - name: Run cassandra integration tests
-      run: bash scripts/cassandra-integration-test.sh
+      run: bash scripts/cassandra-integration-test.sh ${{ matrix.version.schema }}
+    services:
+      cassandra1:
+        image: ${{ matrix.version.image }}:${{ matrix.version.tag }}
+        ports:
+          - 9042:9042
+          - 9160:9160
+      cassandra2:
+        image: ${{ matrix.version.image }}:${{ matrix.version.tag }}
+        ports:
+          - 9043:9042
+          - 9161:9160
+      cassandra3:
+        image: ${{ matrix.version.image }}:${{ matrix.version.tag }}
+        ports:
+          - 9044:9042
+          - 9162:9160
diff --git a/plugin/storage/cassandra/schema/v004.cql.tmpl b/plugin/storage/cassandra/schema/v004.cql.tmpl
@@ -0,0 +1,197 @@
+--
+-- Creates Cassandra keyspace with tables for traces and dependencies.
+--
+-- Required parameters:
+--
+--   keyspace
+--     name of the keyspace
+--   replication
+--     replication strategy for the keyspace, such as
+--       for prod environments
+--         {'class': 'NetworkTopologyStrategy', '$datacenter': '${replication_factor}' }
+--       for test environments
+--         {'class': 'SimpleStrategy', 'replication_factor': '1'}
+--   trace_ttl
+--     default time to live for trace data, in seconds
+--   dependencies_ttl
+--     default time to live for dependencies data, in seconds (0 for no TTL)
+--
+-- Non-configurable settings:
+--   gc_grace_seconds is non-zero, see: http://www.uberobert.com/cassandra_gc_grace_disables_hinted_handoff/
+--   For TTL of 2 days, compaction window is 1 hour, rule of thumb here: http://thelastpickle.com/blog/2016/12/08/TWCS-part1.html
+
+CREATE KEYSPACE IF NOT EXISTS ${keyspace} WITH replication = ${replication};
+
+CREATE TYPE IF NOT EXISTS ${keyspace}.keyvalue (
+    key             text,
+    value_type      text,
+    value_string    text,
+    value_bool      boolean,
+    value_long      bigint,
+    value_double    double,
+    value_binary    blob,
+);
+
+CREATE TYPE IF NOT EXISTS ${keyspace}.log (
+    ts      bigint, // microseconds since epoch
+    fields  list<frozen<keyvalue>>,
+);
+
+CREATE TYPE IF NOT EXISTS ${keyspace}.span_ref (
+    ref_type        text,
+    trace_id        blob,
+    span_id         bigint,
+);
+
+CREATE TYPE IF NOT EXISTS ${keyspace}.process (
+    service_name    text,
+    tags            list<frozen<keyvalue>>,
+);
+
+-- Notice we have span_hash. This exists only for zipkin backwards compat. Zipkin allows spans with the same ID.
+-- Note: Cassandra re-orders non-PK columns alphabetically, so the table looks differently in CQLSH "describe table".
+-- start_time is bigint instead of timestamp as we require microsecond precision
+CREATE TABLE IF NOT EXISTS ${keyspace}.traces (
+    trace_id        blob,
+    span_id         bigint,
+    span_hash       bigint,
+    parent_id       bigint,
+    operation_name  text,
+    flags           int,
+    start_time      bigint, // microseconds since epoch
+    duration        bigint, // microseconds
+    tags            list<frozen<keyvalue>>,
+    logs            list<frozen<log>>,
+    refs            list<frozen<span_ref>>,
+    process         frozen<process>,
+    PRIMARY KEY (trace_id, span_id, span_hash)
+)
+    WITH compaction = {
+        'compaction_window_size': '1',
+        'compaction_window_unit': 'HOURS',
+        'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
+    }
+    AND default_time_to_live = ${trace_ttl}
+    AND speculative_retry = 'NONE'
+    AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
+
+CREATE TABLE IF NOT EXISTS ${keyspace}.service_names (
+    service_name text,
+    PRIMARY KEY (service_name)
+)
+    WITH compaction = {
+        'min_threshold': '4',
+        'max_threshold': '32',
+        'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
+    }
+    AND default_time_to_live = ${trace_ttl}
+    AND speculative_retry = 'NONE'
+    AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
+
+CREATE TABLE IF NOT EXISTS ${keyspace}.operation_names_v2 (
+    service_name        text,
+    span_kind           text,
+    operation_name      text,
+    PRIMARY KEY ((service_name), span_kind, operation_name)
+)
+    WITH compaction = {
+        'min_threshold': '4',
+        'max_threshold': '32',
+        'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
+    }
+    AND default_time_to_live = ${trace_ttl}
+    AND speculative_retry = 'NONE'
+    AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
+
+-- index of trace IDs by service + operation names, sorted by span start_time.
+CREATE TABLE IF NOT EXISTS ${keyspace}.service_operation_index (
+    service_name        text,
+    operation_name      text,
+    start_time          bigint, // microseconds since epoch
+    trace_id            blob,
+    PRIMARY KEY ((service_name, operation_name), start_time)
+) WITH CLUSTERING ORDER BY (start_time DESC)
+    AND compaction = {
+        'compaction_window_size': '1',
+        'compaction_window_unit': 'HOURS',
+        'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
+    }
+    AND default_time_to_live = ${trace_ttl}
+    AND speculative_retry = 'NONE'
+    AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
+
+CREATE TABLE IF NOT EXISTS ${keyspace}.service_name_index (
+    service_name      text,
+    bucket            int,
+    start_time        bigint, // microseconds since epoch
+    trace_id          blob,
+    PRIMARY KEY ((service_name, bucket), start_time)
+) WITH CLUSTERING ORDER BY (start_time DESC)
+    AND compaction = {
+        'compaction_window_size': '1',
+        'compaction_window_unit': 'HOURS',
+        'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
+    }
+    AND default_time_to_live = ${trace_ttl}
+    AND speculative_retry = 'NONE'
+    AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
+
+CREATE TABLE IF NOT EXISTS ${keyspace}.duration_index (
+    service_name    text,      // service name
+    operation_name  text,      // operation name, or blank for queries without span name
+    bucket          timestamp, // time bucket, - the start_time of the given span rounded to an hour
+    duration        bigint,    // span duration, in microseconds
+    start_time      bigint,    // microseconds since epoch
+    trace_id        blob,
+    PRIMARY KEY ((service_name, operation_name, bucket), duration, start_time, trace_id)
+) WITH CLUSTERING ORDER BY (duration DESC, start_time DESC)
+    AND compaction = {
+        'compaction_window_size': '1',
+        'compaction_window_unit': 'HOURS',
+        'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
+    }
+    AND default_time_to_live = ${trace_ttl}
+    AND speculative_retry = 'NONE'
+    AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
+
+-- a bucketing strategy may have to be added for tag queries
+-- we can make this table even better by adding a timestamp to it
+CREATE TABLE IF NOT EXISTS ${keyspace}.tag_index (
+    service_name    text,
+    tag_key         text,
+    tag_value       text,
+    start_time      bigint, // microseconds since epoch
+    trace_id        blob,
+    span_id         bigint,
+    PRIMARY KEY ((service_name, tag_key, tag_value), start_time, trace_id, span_id)
+)
+    WITH CLUSTERING ORDER BY (start_time DESC)
+    AND compaction = {
+        'compaction_window_size': '1',
+        'compaction_window_unit': 'HOURS',
+        'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
+    }
+    AND default_time_to_live = ${trace_ttl}
+    AND speculative_retry = 'NONE'
+    AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
+
+CREATE TYPE IF NOT EXISTS ${keyspace}.dependency (
+    parent          text,
+    child           text,
+    call_count      bigint,
+    source          text,
+);
+
+-- compaction strategy is intentionally different as compared to other tables due to the size of dependencies data
+CREATE TABLE IF NOT EXISTS ${keyspace}.dependencies_v2 (
+    ts_bucket    timestamp,
+    ts           timestamp,
+    dependencies list<frozen<dependency>>,
+    PRIMARY KEY (ts_bucket, ts)
+) WITH CLUSTERING ORDER BY (ts DESC)
+    AND compaction = {
+        'min_threshold': '4',
+        'max_threshold': '32',
+        'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
+    }
+    AND default_time_to_live = ${dependencies_ttl};
diff --git a/scripts/cassandra-integration-test.sh b/scripts/cassandra-integration-test.sh
@@ -2,29 +2,13 @@
 
 set -ex
 
-# Clean up before starting.
-docker rm -f cassandra || true
-docker rm -f cassandra2 || true
-docker network rm integration_test || true
-
-# Create a network so that the schema container can communicate with the cassandra containers.
-docker network create integration_test
-
-# Start cassandra containers whose ports are exposed to localhost to facilitate testing.
-docker run -d --name cassandra --network integration_test -p 9042:9042 -p 9160:9160 cassandra:3.9
-docker run -d --name cassandra2 --network integration_test -p 9043:9042 -p 9161:9160 cassandra:3.9
-
 # Build the schema container and run it rather than using the existing container in Docker Hub since that
 # requires this current build to succeed before this test can use it; chicken and egg problem.
 docker build -t jaeger-cassandra-schema-integration-test plugin/storage/cassandra/
-docker run --network integration_test -e CQLSH_HOST=cassandra -e TEMPLATE=/cassandra-schema/v001.cql.tmpl jaeger-cassandra-schema-integration-test
-docker run --network integration_test -e CQLSH_HOST=cassandra2 -e TEMPLATE=/cassandra-schema/v002.cql.tmpl jaeger-cassandra-schema-integration-test
+docker run -e CQLSH_HOST=localhost -e CQLSH_PORT=9042 -e TEMPLATE=/cassandra-schema/$1.cql.tmpl --network=host jaeger-cassandra-schema-integration-test
+docker run -e CQLSH_HOST=localhost -e CQLSH_PORT=9043 -e TEMPLATE=/cassandra-schema/$2.cql.tmpl --network=host jaeger-cassandra-schema-integration-test
+docker run -e CQLSH_HOST=localhost -e CQLSH_PORT=9044 -e TEMPLATE=/cassandra-schema/$3.cql.tmpl --network=host jaeger-cassandra-schema-integration-test
 
 # Run the test.
 export STORAGE=cassandra
 make storage-integration-test
-
-# Tear down after.
-docker rm -f cassandra
-docker rm -f cassandra2
-docker network rm integration_test