Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ci-build-spm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@ jobs:
- name: Temporary - only run the build
run:
cd docker-compose/monitor && make build

- name: Run SPM Test
run: ./scripts/spm-integration-test.sh

2 changes: 1 addition & 1 deletion docker-compose/monitor/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ build: clean-jaeger
.PHONY: dev
dev: export JAEGER_IMAGE_TAG = dev
dev:
docker compose -f docker-compose.yml up
docker compose -f docker-compose.yml up $(DOCKER_COMPOSE_ARGS)

.PHONY: clean-jaeger
clean-jaeger:
Expand Down
109 changes: 109 additions & 0 deletions scripts/spm-integration-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/bin/bash

set -e -uxf -o pipefail

compose_file=docker-compose/monitor/docker-compose.yml

dump_logs(){
echo "::group:: docker logs"
docker compose -f $compose_file logs
echo "::endgroup::"
}

# Function to check if a service is healthy
check_service_health() {
local service_name=$1
local url=$2
local wait_seconds=5
local retry_count=10
echo "Checking health of service: $service_name at $url"
for i in $(seq 1 $retry_count); do
if curl -s -L --head --request GET "$url" | grep "200 OK" > /dev/null; then
echo "$service_name is healthy"
return 0
else
echo "Waiting for $service_name to be healthy... ($i/$retry_count)"
sleep $wait_seconds
fi
done

echo "Error: $service_name did not become healthy in time"
return 1
}

# Function to check if all services are healthy
wait_for_services() {
echo "Waiting for services to be up and running..."
check_service_health "Jaeger" "http://localhost:16686"
check_service_health "Prometheus" "http://localhost:9090/graph"
check_service_health "Grafana" "http://localhost:3000"
}

# Function to check SPM
check_spm() {
local timeout=180
local interval=5
local end_time=$((SECONDS + timeout))
echo "Checking SPM"
services_list=("driver" "customer" "mysql" "redis" "frontend" "route" "ui")
for service in "${services_list[@]}"; do
echo "Processing service: $service"
while [ $SECONDS -lt $end_time ]; do
response=$(curl -s "http://localhost:16686/api/metrics/calls?service=$service&endTs=$(date +%s)000&lookback=1000&step=100&ratePer=60000")
service_name=$(echo "$response" | jq -r 'if .metrics and .metrics[0] then .metrics[0].labels[] | select(.name=="service_name") | .value else empty end')
if [ "$service_name" != "$service" ]; then
echo "Service name does not match '$service'"
sleep $interval
else
echo "Service name matched with '$service'"
break
fi
done
if [ $SECONDS -gt $end_time ]; then
echo "Error: no metrics returned by the API for service $service"
exit 1
fi

all_non_zero=true
mapfile -t metric_points < <(echo "$response" | jq -r '.metrics[0].metricPoints[].gaugeValue.doubleValue')
while [ ${#metric_points[@]} -lt 3 ]; do
echo "Metric points for service $service are less than 3"
mapfile -t metric_points < <(echo "$response" | jq -r '.metrics[0].metricPoints[].gaugeValue.doubleValue')
done
local non_zero_count=0
for value in "${metric_points[@]}"; do
if [[ "$value" == "0" || "$value" == "0.0" ]]; then
all_non_zero=false
break
else
non_zero_count=$((non_zero_count + 1))
fi
done

if [ "$all_non_zero" = true ] && [ $non_zero_count -gt 3 ]; then
echo "All gauge values are non-zero and count is greater than 3 for $service"
else
echo "Some gauge values are zero or count is not greater than 3 for $service"
exit 1
fi
done
}

# Function to tear down Docker Compose services
teardown_services() {
dump_logs
docker compose -f $compose_file down
}

# Main function
main() {
(cd docker-compose/monitor && make build && make dev DOCKER_COMPOSE_ARGS="-d")
wait_for_services
check_spm
echo "All services are running correctly"
}

trap teardown_services EXIT INT

# Run the main function
main