Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions .github/workflows/cve-scan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# CVE scan for default chart images when Chart.yaml or values.yaml (image defaults) change.
# Generates a cve-report.md and uploads it as an artifact.
name: CVE Scan (Chart Images)

on:
pull_request:
branches: [ master ]
paths:
- 'charts/dify/Chart.yaml'

permissions:
contents: read
security-events: write # optional: for SARIF upload

jobs:
cve-scan:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install Trivy
run: |
sudo apt-get update -qq && sudo apt-get install -y wget
TRIVY_VERSION="0.49.0"
wget -q "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz"
tar -xzf "trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz" -C /usr/local/bin trivy
trivy --version

- name: Extract default images from chart
id: images
run: |
pip install --quiet pyyaml
python3 ci/scripts/extract-chart-images.py | tee image-list.txt
echo "count=$(wc -l < image-list.txt)" >> "$GITHUB_OUTPUT"

- name: Run Trivy on each image
if: steps.images.outputs.count != '0'
run: |
mkdir -p trivy-results
while IFS= read -r image; do
[ -z "$image" ] && continue
safe=$(echo "$image" | tr '/:' '__')
trivy image --format json --output "trivy-results/${safe}.json" --timeout 10m "$image" || true
done < image-list.txt

- name: Generate CVE report
if: steps.images.outputs.count != '0'
run: |
app_version=$(grep -E '^appVersion:' charts/dify/Chart.yaml | sed -n 's/.*"\(.*\)".*/\1/p' || true)
python3 ci/scripts/trivy-report-to-md.py trivy-results --version "${app_version:-unknown}" > cve-report.md
echo "## Container Security Scan (CVE Report)" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
cat cve-report.md >> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"

- name: Upload CVE report
if: steps.images.outputs.count != '0'
uses: actions/upload-artifact@v4
with:
name: cve-report
path: cve-report.md
51 changes: 51 additions & 0 deletions ci/scripts/extract-chart-images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3
"""
Extract default container images from the Dify Helm chart (values.yaml + Chart.yaml).
Outputs one image reference per line: repository:tag
Used by CVE scan workflow to know which images to scan when Chart/values change.
"""
from pathlib import Path
import sys
import yaml

# Keys that use Chart.AppVersion as default tag in templates (api, web, pluginDaemon)
APPVERSION_KEYS = ("api", "web", "pluginDaemon")

def main():
repo_root = Path(__file__).resolve().parents[2]
chart_dir = repo_root / "charts" / "dify"
values_path = chart_dir / "values.yaml"
chart_path = chart_dir / "Chart.yaml"

if not values_path.exists():
print("values.yaml not found", file=sys.stderr)
sys.exit(1)
if not chart_path.exists():
print("Chart.yaml not found", file=sys.stderr)
sys.exit(1)

with open(values_path) as f:
values = yaml.safe_load(f)
with open(chart_path) as f:
chart = yaml.safe_load(f)

app_version = (chart.get("appVersion") or "").strip().strip('"')
image_config = values.get("image") or {}

images = []
for key in ("api", "web", "sandbox", "proxy", "ssrfProxy", "pluginDaemon"):
block = image_config.get(key)
if not block:
continue
repo = (block.get("repository") or "").strip()
tag = (block.get("tag") or "").strip().strip('"')
if key in APPVERSION_KEYS and not tag:
tag = app_version
if repo:
images.append(f"{repo}:{tag or 'latest'}")

for img in sorted(set(images)):
print(img)

if __name__ == "__main__":
main()
161 changes: 161 additions & 0 deletions ci/scripts/trivy-report-to-md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3
"""
Read Trivy JSON scan results (one file per image) and emit a cve-report.md
in the same format as Langgenius/Dify container security scan reports.
"""
from pathlib import Path
import json
import sys
from datetime import datetime
from collections import defaultdict

# Image name prefix for "Langgenius Supported Images"; others go to "Third-Party"
# When ArtifactName is missing we fall back to path.stem, which uses underscores
# (workflow sanitizes / and : to _), so "langgenius/" becomes "langgenius_".
LANGGENIUS_PREFIX = "langgenius/"
LANGGENIUS_PREFIX_FALLBACK = "langgenius_" # sanitized filename form


def is_langgenius(name: str) -> bool:
"""True if this image name refers to a Langgenius-supported image."""
return name.startswith(LANGGENIUS_PREFIX) or name.startswith(LANGGENIUS_PREFIX_FALLBACK)


def slug(name: str) -> str:
"""Turn image ref like langgenius/dify-api:1.10.1 into a short display name."""
# Fallback path.stem is sanitized: langgenius/dify-api:1.10.1 -> langgenius_dify-api_1.10.1-fix.1
if not ("/" in name or ":" in name) and name.startswith(LANGGENIUS_PREFIX_FALLBACK):
rest = name[len(LANGGENIUS_PREFIX_FALLBACK) :].replace("_", "-")
return rest if rest else name
# Normal repo:tag form
if ":" in name:
repo, tag = name.rsplit(":", 1)
else:
repo, tag = name, "latest"
base = repo.split("/")[-1] if "/" in repo else repo
return f"{base}-{tag}".replace("/", "-").replace(".", "-")


def count_severities(vulns: list) -> tuple[int, int]:
critical = sum(1 for v in vulns if (v.get("Severity") or "").upper() == "CRITICAL")
high = sum(1 for v in vulns if (v.get("Severity") or "").upper() == "HIGH")
return critical, high


def load_result(path: Path) -> tuple[str, int, int] | None:
"""Load one Trivy JSON file. Return (artifact_name, critical, high) or None."""
try:
with open(path) as f:
data = json.load(f)
except (json.JSONDecodeError, OSError):
return None
name = data.get("ArtifactName") or path.stem
total_c, total_h = 0, 0
for res in data.get("Results") or []:
vulns = res.get("Vulnerabilities") or []
c, h = count_severities(vulns)
total_c += c
total_h += h
return (name, total_c, total_h)


def main():
if len(sys.argv) < 2:
print("Usage: trivy-report-to-md.py <dir-with-trivy-*.json> [--version X.Y.Z]", file=sys.stderr)
sys.exit(1)
args = sys.argv[1:]
report_dir = Path(args[0])
version = "1.0"
i = 1
while i < len(args):
if args[i] == "--version" and i + 1 < len(args):
version = args[i + 1]
i += 2
continue
i += 1

if not report_dir.is_dir():
print(f"Not a directory: {report_dir}", file=sys.stderr)
sys.exit(1)

# Collect (display_name, critical, high) per image
by_image: dict[str, tuple[int, int]] = {}
for f in sorted(report_dir.glob("*.json")):
row = load_result(f)
if row:
name, c, h = row
by_image[name] = (c, h)

langgenius: list[tuple[str, int, int]] = []
third_party: list[tuple[str, int, int]] = []
for name in sorted(by_image.keys()):
c, h = by_image[name]
display = slug(name)
if is_langgenius(name):
langgenius.append((display, c, h))
else:
third_party.append((display, c, h))

scan_date = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")
lines = [
"# Container Security Scan Results",
"",
f"**Version:** {version}",
"",
f"**Scan Date:** {scan_date}",
"",
"## Scan Results Summary",
"",
]

# Langgenius Supported Images
lines.append("### Langgenius Supported Images")
lines.append("")
for display, c, h in langgenius:
lines.append(f"#### {display}")
lines.append(f"- **CRITICAL vulnerabilities:** {c}")
lines.append(f"- **HIGH vulnerabilities:** {h}")
lines.append("")
if langgenius:
tc = sum(x[1] for x in langgenius)
th = sum(x[2] for x in langgenius)
lines.append("**Langgenius Supported Images Summary:**")
lines.append(f"- **CRITICAL:** {tc}")
lines.append(f"- **HIGH:** {th}")
lines.append("")
lines.append("---")
lines.append("")

# Third-Party Images
lines.append("### Third-Party Images")
lines.append("")
for display, c, h in third_party:
lines.append(f"#### {display}")
lines.append(f"- **CRITICAL vulnerabilities:** {c}")
lines.append(f"- **HIGH vulnerabilities:** {h}")
lines.append("")
if third_party:
tc = sum(x[1] for x in third_party)
th = sum(x[2] for x in third_party)
lines.append("**Third-Party Images Summary:**")
lines.append(f"- **CRITICAL:** {tc}")
lines.append(f"- **HIGH:** {th}")
lines.append("")
lines.append("---")
lines.append("")

# Total Summary
total_c = sum(x[1] for x in langgenius + third_party)
total_h = sum(x[2] for x in langgenius + third_party)
lc = sum(x[1] for x in langgenius)
lh = sum(x[2] for x in langgenius)
lines.append("## Total Summary")
lines.append(f"- **Total CRITICAL vulnerabilities:** {total_c} (Langgenius: {lc}, Third-Party: {total_c - lc})")
lines.append(f"- **Total HIGH vulnerabilities:** {total_h} (Langgenius: {lh}, Third-Party: {total_h - lh})")
lines.append("")

print("\n".join(lines))


if __name__ == "__main__":
main()