diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index fadb7b4d765f..a5bede5c3925 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -23479,6 +23479,13 @@ export interface components { extra_files?: components["schemas"]["ExtraFiles"] | null; /** Hashes */ hashes?: components["schemas"]["FetchDatasetHash"][] | null; + /** + * Headers + * @description Optional headers to include in the URL fetch request + */ + headers?: { + [key: string]: string; + } | null; /** * Info * @description Free text field that can be used to store arbitrary information about the dataset. This used to be prominently diff --git a/config/url_headers_conf.yml.sample b/config/url_headers_conf.yml.sample new file mode 120000 index 000000000000..8f1a15c75fa1 --- /dev/null +++ b/config/url_headers_conf.yml.sample @@ -0,0 +1 @@ +../lib/galaxy/config/sample/url_headers_conf.yml.sample \ No newline at end of file diff --git a/doc/source/admin/enable_headers_in_fetch_requests.md b/doc/source/admin/enable_headers_in_fetch_requests.md new file mode 100644 index 000000000000..9f4ff6f51620 --- /dev/null +++ b/doc/source/admin/enable_headers_in_fetch_requests.md @@ -0,0 +1,152 @@ +# Enabling HTTP Headers in Fetch Requests + +Galaxy allows users to **fetch remote data by URL** (for example via _Upload → Paste/Fetch data_ or via APIs that retrieve external resources). +By default, Galaxy **does not forward any custom HTTP headers** when fetching URLs. This restriction is intentional and is part of Galaxy’s security model. + +Starting with recent Galaxy releases, administrators can **explicitly allow a controlled set of HTTP headers** to be sent with fetch requests, based on the target URL. This enables integrations with authenticated services (e.g. APIs requiring `Authorization` headers) while maintaining strict security boundaries. + +This document explains **how to safely enable HTTP headers for fetch requests**, how the allow‑list mechanism works, and how to configure it. + +## Why Header Allow‑Listing Is Required + +Allowing arbitrary headers in server‑side HTTP requests is dangerous. Without restrictions, users could: + +- Access internal services (SSRF attacks) +- Exfiltrate credentials via forwarded headers +- Abuse Galaxy as a proxy to privileged networks + +To prevent this, Galaxy implements **explicit header allow‑listing with URL pattern matching**: + +- **No headers are allowed by default** +- Each allowed header must be explicitly configured +- Headers are only sent to URLs that match defined patterns +- Sensitive headers can be stored securely using Galaxy’s Vault + +## Configuration Overview + +Header forwarding for fetch requests is controlled via a dedicated configuration file: + +```yaml +galaxy: + url_headers_config_file: url_headers_conf.yml +``` + +This file defines: + +- Which **HTTP headers** are allowed +- For which **URL patterns** they may be sent +- Whether headers are **sensitive** (stored encrypted in the Vault) + +If this configuration file is **not set or empty**, **no headers will ever be forwarded**. + +## url_headers_conf.yml Format + +The configuration file is a YAML list of rules. Each rule applies to one or more URL patterns. + +### Basic Structure + +```yaml +- url_pattern: "https://api.example.org/.*" + headers: + - name: Authorization + sensitive: true + - name: X-API-Key + sensitive: true +``` + +### Fields + +| Field | Description | +| --------------------- | -------------------------------------------------------- | +| `url_pattern` | A regular expression matched against the full URL | +| `headers` | List of allowed HTTP headers for matching URLs | +| `headers[].name` | Exact HTTP header name (case‑insensitive) | +| `headers[].sensitive` | Whether the header value is stored securely in the Vault | + +## Sensitive vs Non‑Sensitive Headers + +### Sensitive Headers + +Sensitive headers (for example `Authorization`, `X-API-Key`, `Cookie`) are: + +- **Encrypted and stored in the Galaxy Vault** +- Never logged or exposed in plaintext +- Managed through Galaxy’s secure secrets infrastructure + +Example: + +```yaml +- url_pattern: "https://protected.example.com/.*" + headers: + - name: Authorization + sensitive: true +``` + +### Non‑Sensitive Headers + +Non‑sensitive headers may be stored in plain configuration and are typically used for: + +- Feature flags +- API versioning +- Public metadata headers + +Example: + +```yaml +- url_pattern: "https://public.example.com/.*" + headers: + - name: X-Client-Version + sensitive: false +``` + +## Multiple Rules and URL Matching + +Multiple rules may be defined. The first rule whose `url_pattern` matches the request URL is applied. + +```yaml +- url_pattern: "https://api.github.com/.*" + headers: + - name: Authorization + sensitive: true + +- url_pattern: "https://raw.githubusercontent.com/.*" + headers: + - name: X-Client-Version + sensitive: false +``` + +```{note} +Rules are evaluated in order. Be careful with overly broad patterns such as `.*`. +``` + +## Using Headers in Practice + +Once configured, users (or tools) may provide header values when performing fetch operations. Galaxy will: + +1. Validate the target URL against the allow‑list +2. Filter headers to the allowed set +3. Securely inject sensitive headers at request time + +Headers not explicitly allowed **will be silently dropped**. + +## Security Best Practices + +```{warning} +Only allow headers and URL patterns that are strictly necessary. +``` + +Recommended practices: + +- Prefer **narrow URL patterns** over wildcards +- Mark authentication headers as `sensitive: true` +- Avoid allowing `Cookie` headers unless absolutely required +- Never allow headers for internal or private network ranges + +## Troubleshooting + +If headers are not being forwarded as expected: + +1. Verify `url_headers_config_file` is configured in `galaxy.yml` +2. Confirm the URL matches the configured `url_pattern` +3. Check that the header name matches exactly +4. Ensure Galaxy has access to the configured Vault diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index 8f922c5526ce..91bca4990248 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -5638,6 +5638,26 @@ :Type: str +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``url_headers_config_file`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Configuration file for URL request headers allow-list with URL + pattern matching. This file defines which HTTP headers are allowed + in URL fetch requests based on URL patterns, and whether they + should be treated as sensitive (encrypted in the vault) or not. If + no allow-list is specified, no headers will be allowed in URL + requests. This provides fine-grained security control over what + headers can be sent when Galaxy fetches external URLs on behalf of + users, allowing different headers for different target domains or + services. + The value of this option will be resolved with respect to + . +:Default: ``url_headers_conf.yml`` +:Type: str + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``display_builtin_converters`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst index 491a492d53e5..385130b374ec 100644 --- a/doc/source/admin/index.rst +++ b/doc/source/admin/index.rst @@ -18,6 +18,7 @@ Galaxy Deployment & Administration jobs job_metrics authentication + enable_headers_in_fetch_requests tool_panel data_tables mq diff --git a/lib/galaxy/app_unittest_utils/galaxy_mock.py b/lib/galaxy/app_unittest_utils/galaxy_mock.py index 1d5a26565c9d..82a0b7886655 100644 --- a/lib/galaxy/app_unittest_utils/galaxy_mock.py +++ b/lib/galaxy/app_unittest_utils/galaxy_mock.py @@ -291,6 +291,7 @@ def __init__(self, **kwargs): self.monitor_thread_join_timeout = 1 self.integrated_tool_panel_config = None self.vault_config_file = kwargs.get("vault_config_file") + self.url_headers_config_file = None self.max_discovered_files = 10000 self.display_builtin_converters = True self.enable_notification_system = True diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index df7c57d9a77e..a6b611ca089d 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -3048,6 +3048,18 @@ galaxy: # . #vault_config_file: vault_conf.yml + # Configuration file for URL request headers allow-list with URL + # pattern matching. This file defines which HTTP headers are allowed + # in URL fetch requests based on URL patterns, and whether they should + # be treated as sensitive (encrypted in the vault) or not. If no + # allow-list is specified, no headers will be allowed in URL requests. + # This provides fine-grained security control over what headers can be + # sent when Galaxy fetches external URLs on behalf of users, allowing + # different headers for different target domains or services. + # The value of this option will be resolved with respect to + # . + #url_headers_config_file: url_headers_conf.yml + # Display built-in converters in the tool panel. #display_builtin_converters: true diff --git a/lib/galaxy/config/sample/url_headers_conf.yml.sample b/lib/galaxy/config/sample/url_headers_conf.yml.sample new file mode 100644 index 000000000000..fb0eb7d8678a --- /dev/null +++ b/lib/galaxy/config/sample/url_headers_conf.yml.sample @@ -0,0 +1,115 @@ +# Allowed URL Headers Configuration +# +# This file defines which HTTP headers are allowed in URL fetch requests based +# on URL patterns, and whether they should be treated as sensitive (encrypted +# in the vault) or not. +# +# If no allow-list is specified or this file is empty/missing, NO headers will +# be allowed in URL requests. +# +# Configuration structure: +# patterns: +# - url_pattern: A regular expression pattern to match URLs +# headers: +# - name: The exact header name (case-insensitive) +# sensitive: Whether this header contains sensitive information that should +# be encrypted when stored in the database (requires vault configuration) +# +# IMPORTANT: +# ------------------------------------ +# When a URL matches MULTIPLE patterns, the union of all allowed headers is used. +# This means you can compose permissions from multiple patterns for flexibility. +# +# Example: A URL matching both pattern A (allows headers X, Y) and pattern B +# (allows headers Y, Z) will allow headers X, Y, and Z. +# +# Security: If ANY matching pattern marks a header as sensitive, it will be +# treated as sensitive (secure-by-default). +# +# The following examples are for illustration purposes only; please use only the minimum configuration for your needs. +# Examples: + +patterns: + # GitHub API access - allow authentication headers for GitHub URLs + - url_pattern: "^https://api\\.github\\.com/.*" + headers: + - name: Authorization + sensitive: true + - name: Accept + sensitive: false + - name: X-GitHub-Api-Version + sensitive: false + + # Generic GitHub content (raw files, releases) - no auth needed + - url_pattern: "^https://(raw\\.githubusercontent\\.com|github\\.com/.*/releases/download)/.*" + headers: + - name: Accept + sensitive: false + - name: Accept-Encoding + sensitive: false + + # AWS S3 buckets - allow AWS authentication headers + - url_pattern: "^https://.*\\.s3\\..+\\.amazonaws\\.com/.*" + headers: + - name: Authorization + sensitive: true + - name: X-Amz-Date + sensitive: false + - name: X-Amz-Content-Sha256 + sensitive: false + - name: X-Amz-Security-Token + sensitive: true + + # Generic cloud storage APIs + - url_pattern: "^https://.*\\.(googleapis\\.com|azure\\.com|digitaloceanspaces\\.com)/.*" + headers: + - name: Authorization + sensitive: true + - name: X-API-Key + sensitive: true + - name: Accept + sensitive: false + + # FTP over HTTP services + - url_pattern: "^https?://ftp\\..*/.*" + headers: + - name: Authorization + sensitive: true + - name: Accept + sensitive: false + + # Academic/research data repositories + - url_pattern: "^https://.*(zenodo\\.org|figshare\\.com|dryad\\.org|dataverse\\.org)/.*" + headers: + - name: Authorization + sensitive: true + - name: X-API-Key + sensitive: true + - name: Accept + sensitive: false + + # HTTPS URLs - basic headers only (most restrictive for unknown sources) + - url_pattern: "^https://.*" + headers: + - name: Authorization + sensitive: true + - name: X-Auth-Token + sensitive: true + - name: X-API-Key + sensitive: true + - name: Accept + sensitive: false + - name: Accept-Language + sensitive: false + - name: Accept-Encoding + sensitive: false + - name: Cache-Control + sensitive: false + +# Security notes: +# - All matching patterns contribute their allowed headers (union of permissions) +# - If ANY pattern marks a header as sensitive, it's treated as sensitive +# - Only add headers that are absolutely necessary for your use case +# - When in doubt, mark headers as sensitive to ensure encryption +# - Patterns are order-independent, making configuration more composable +# - HTTP (non-HTTPS) URLs are generally not recommended and may be blocked diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index 8757133292f1..20631d6459c8 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -4162,6 +4162,20 @@ mapping: desc: | Vault config file. + url_headers_config_file: + type: str + default: url_headers_conf.yml + path_resolves_to: config_dir + required: false + desc: | + Configuration file for URL request headers allow-list with URL pattern matching. + This file defines which HTTP headers are allowed in URL fetch requests based + on URL patterns, and whether they should be treated as sensitive (encrypted + in the vault) or not. If no allow-list is specified, no headers will be + allowed in URL requests. This provides fine-grained security control over + what headers can be sent when Galaxy fetches external URLs on behalf of users, + allowing different headers for different target domains or services. + display_builtin_converters: type: bool default: true diff --git a/lib/galaxy/config/url_headers.py b/lib/galaxy/config/url_headers.py new file mode 100644 index 000000000000..908999f4abfc --- /dev/null +++ b/lib/galaxy/config/url_headers.py @@ -0,0 +1,371 @@ +import abc +import logging +import re +from typing import Optional + +import yaml +from pydantic import ( + BaseModel, + field_validator, +) + +from galaxy.config import GalaxyAppConfiguration +from galaxy.exceptions import ConfigDoesNotAllowException + +log = logging.getLogger(__name__) + + +class UrlHeadersConfigurationException(Exception): + """Raised when there is an error in the URL headers configuration.""" + + pass + + +class HeaderConfig(BaseModel): + """Configuration for a single header.""" + + name: str + sensitive: bool = False + + @field_validator("name") + @classmethod + def name_must_not_be_empty(cls, v: str) -> str: + if not v or not v.strip(): + raise ValueError("Header name cannot be empty") + return v.strip() + + +class UrlPatternConfig(BaseModel): + """Configuration for a URL pattern and its allowed headers.""" + + url_pattern: str + headers: list[HeaderConfig] = [] + + @field_validator("url_pattern") + @classmethod + def pattern_must_not_be_empty(cls, v: str) -> str: + if not v or not v.strip(): + raise ValueError("URL pattern cannot be empty") + return v.strip() + + +class UrlHeadersConfig(abc.ABC): + """ + Manages the configuration for allowed URL request headers with pattern matching. + """ + + @abc.abstractmethod + def is_header_allowed_for_url(self, header_name: str, url: str) -> bool: + pass + + @abc.abstractmethod + def is_header_sensitive_for_url(self, header_name: str, url: str) -> bool: + pass + + @abc.abstractmethod + def find_all_matching(self, url: str) -> list[UrlPatternConfig]: + pass + + +class NullUrlHeadersConfiguration(UrlHeadersConfig): + """ + Default configuration when there is no real configuration for allowing URL request headers. + + This configuration raises exceptions when any method is called, providing fail-fast + behavior to clearly indicate that headers require configuration. + """ + + _ERROR_MESSAGE = ( + "No URL headers configuration is available. " + "Headers require explicit configuration to be allowed. " + "Please contact your Galaxy administrator to configure URL headers." + ) + + def is_header_allowed_for_url(self, header_name: str, url: str) -> bool: + """ + Raises an exception - headers require configuration. + + Raises: + ConfigDoesNotAllowException: Always raised to indicate missing configuration + """ + raise ConfigDoesNotAllowException(f"Cannot check if header '{header_name}' is allowed: {self._ERROR_MESSAGE}") + + def is_header_sensitive_for_url(self, header_name: str, url: str) -> bool: + """ + Raises an exception - headers require configuration. + + Raises: + ConfigDoesNotAllowException: Always raised to indicate missing configuration + """ + raise ConfigDoesNotAllowException(f"Cannot check if header '{header_name}' is sensitive: {self._ERROR_MESSAGE}") + + def find_all_matching(self, url: str) -> list[UrlPatternConfig]: + """ + Raises an exception - no patterns are configured. + + Raises: + ConfigDoesNotAllowException: Always raised to indicate missing configuration + """ + raise ConfigDoesNotAllowException(f"Cannot find matching patterns for URL '{url}': {self._ERROR_MESSAGE}") + + +class UrlHeadersConfiguration(UrlHeadersConfig): + """Contains valid configuration to allow URL request headers.""" + + def __init__(self): + self._patterns: list[UrlPatternConfig] = [] + self._compiled_patterns: list[tuple[re.Pattern[str], UrlPatternConfig]] = [] + + def _add_pattern(self, pattern_config: UrlPatternConfig) -> None: + self._patterns.append(pattern_config) + try: + compiled_pattern = re.compile(pattern_config.url_pattern) + self._compiled_patterns.append((compiled_pattern, pattern_config)) + except re.error as e: + raise UrlHeadersConfigurationException( + f"Invalid regex pattern '{pattern_config.url_pattern}' in URL headers configuration: {e}" + ) from e + + def find_all_matching(self, url: str) -> list[UrlPatternConfig]: + """ + Find all URL patterns that match the given URL. + + Args: + url: The URL to match against patterns + + Returns: + List of all matching UrlPatternConfig objects (may be empty) + """ + matching_patterns = [] + for compiled_pattern, pattern_config in self._compiled_patterns: + if compiled_pattern.match(url): + matching_patterns.append(pattern_config) + return matching_patterns + + def _find_header_in_patterns( + self, header_name: str, matching_patterns: list[UrlPatternConfig] + ) -> Optional[tuple[HeaderConfig, UrlPatternConfig]]: + """ + Find a header configuration in matching patterns. + + Args: + header_name: The header name to find (case-insensitive) + matching_patterns: List of patterns to search + + Returns: + Tuple of (HeaderConfig, UrlPatternConfig) for first match, or None if not found + """ + header_name_lower = header_name.lower() + for pattern_config in matching_patterns: + for header_config in pattern_config.headers: + if header_config.name.lower() == header_name_lower: + return (header_config, pattern_config) + return None + + def is_header_allowed_for_url(self, header_name: str, url: str) -> bool: + """ + Check if a header is allowed for a specific URL. + + If multiple patterns match the URL, the header is allowed if it appears + in ANY of the matching patterns. + + Args: + header_name: The header name to check (case-insensitive) + url: The target URL + + Returns: + True if the header is allowed for this URL, False otherwise + """ + matching_patterns = self.find_all_matching(url) + if not matching_patterns: + return False + + result = self._find_header_in_patterns(header_name, matching_patterns) + return result is not None + + def is_header_sensitive_for_url(self, header_name: str, url: str) -> bool: + """ + Check if a header should be treated as sensitive for a specific URL. + + If multiple patterns match the URL and define the same header, the header + is treated as sensitive if ANY matching pattern marks it as sensitive + (secure by default). + + Args: + header_name: The header name to check (case-insensitive) + url: The target URL + + Returns: + True if the header is allowed and marked as sensitive in any matching pattern, + False otherwise + """ + matching_patterns = self.find_all_matching(url) + if not matching_patterns: + return False + + header_name_lower = header_name.lower() + for pattern_config in matching_patterns: + for header_config in pattern_config.headers: + if header_config.name.lower() == header_name_lower: + if header_config.sensitive: + return True + return False + + +class UrlHeadersConfigFactory: + """Factory for creating UrlHeadersConfig instances.""" + + @staticmethod + def _load_patterns_from_file(config_file: str) -> list[UrlPatternConfig]: + """Load pattern configurations from a YAML file. + + Args: + config_file: Path to the YAML configuration file + + Returns: + List of UrlPatternConfig objects + + Raises: + UrlHeadersConfigurationException: If file cannot be read or parsed + """ + try: + with open(config_file) as f: + config_data = yaml.safe_load(f) + except FileNotFoundError as e: + raise UrlHeadersConfigurationException( + f"URL headers configuration file not found: {config_file}. " + "Please check the 'url_headers_config_file' setting in your Galaxy configuration." + ) from e + except yaml.YAMLError as e: + raise UrlHeadersConfigurationException( + f"Failed to parse URL headers configuration file {config_file}: {e}. Please check the YAML syntax." + ) from e + except Exception as e: + raise UrlHeadersConfigurationException( + f"Failed to read URL headers configuration file {config_file}: {e}" + ) from e + + if not config_data: + return [] + + patterns_data = config_data.get("patterns", []) + if not patterns_data: + return [] + + patterns = [] + for i, pattern_data in enumerate(patterns_data): + try: + pattern_config = UrlPatternConfig(**pattern_data) + patterns.append(pattern_config) + except Exception as e: + raise UrlHeadersConfigurationException( + f"Invalid pattern configuration at index {i} in {config_file}: {e}. " + "Each pattern must have 'url_pattern' (valid regex) and 'headers' (list of header configs)." + ) from e + + return patterns + + @staticmethod + def from_app_config(app_config: GalaxyAppConfiguration) -> UrlHeadersConfig: + """ + Create a UrlHeadersConfig from Galaxy app configuration. + + Args: + app_config: Galaxy application configuration + + Returns: + UrlHeadersConfiguration if config file is specified and exists, + NullUrlHeadersConfiguration otherwise + + Raises: + UrlHeadersConfigurationException: If config file exists but is invalid + """ + config_file = app_config.url_headers_config_file + if not config_file: + return NullUrlHeadersConfiguration() + + return UrlHeadersConfigFactory.from_file(config_file) + + @staticmethod + def from_file(config_file: str) -> UrlHeadersConfig: + """ + Create a UrlHeadersConfig from a YAML file. + + If the file doesn't exist, returns NullUrlHeadersConfiguration for backwards compatibility. + + Args: + config_file: Path to the YAML configuration file + + Returns: + UrlHeadersConfiguration with patterns loaded from the file, or + NullUrlHeadersConfiguration if file doesn't exist + + Raises: + UrlHeadersConfigurationException: If file exists but contains errors + """ + try: + config = UrlHeadersConfiguration() + patterns = UrlHeadersConfigFactory._load_patterns_from_file(config_file) + + for pattern in patterns: + config._add_pattern(pattern) + + log.info(f"Loaded {len(config._patterns)} URL patterns from {config_file}") + return config + except UrlHeadersConfigurationException as e: + # If the file doesn't exist, return null config for backwards compatibility + if "not found" in str(e): + log.warning(f"URL headers configuration file not found: {config_file}. Using null configuration.") + return NullUrlHeadersConfiguration() + # For any other configuration error, re-raise + raise + + @staticmethod + def from_dict(config_dict: dict) -> UrlHeadersConfig: + """ + Create a UrlHeadersConfig from a dictionary (useful for testing). + + Args: + config_dict: Dictionary containing URL headers configuration with 'patterns' key + + Returns: + UrlHeadersConfiguration with patterns loaded from the dictionary + + Raises: + UrlHeadersConfigurationException: If configuration is invalid + + Example: + config_dict = { + "patterns": [ + { + "url_pattern": "^https://api\\.github\\.com/.*", + "headers": [ + {"name": "Authorization", "sensitive": True}, + {"name": "Accept", "sensitive": False} + ] + } + ] + } + config = UrlHeadersConfigFactory.from_dict(config_dict) + """ + config = UrlHeadersConfiguration() + + if not config_dict: + return config + + patterns_data = config_dict.get("patterns", []) + if not patterns_data: + return config + + for i, pattern_data in enumerate(patterns_data): + try: + pattern_config = UrlPatternConfig(**pattern_data) + config._add_pattern(pattern_config) + except Exception as e: + raise UrlHeadersConfigurationException( + f"Invalid pattern configuration at index {i}: {e}. " + "Each pattern must have 'url_pattern' (valid regex) and 'headers' (list of header configs)." + ) from e + + log.info(f"Loaded {len(config._patterns)} URL patterns from dictionary") + return config diff --git a/lib/galaxy/managers/headers_encryption.py b/lib/galaxy/managers/headers_encryption.py new file mode 100644 index 000000000000..2bff5233bb1c --- /dev/null +++ b/lib/galaxy/managers/headers_encryption.py @@ -0,0 +1,364 @@ +""" +Utilities for encrypting sensitive headers using Galaxy's Vault system. + +This module provides functionality to: +1. Identify sensitive headers that should be encrypted based on configuration +2. Encrypt/decrypt headers using the vault +3. Transform data structures to use vault references for sensitive headers + +This can be used for any scenario where HTTP headers containing sensitive information +(like authorization tokens, API keys, etc.) need to be stored securely in the database. +Header sensitivity is determined by the URL headers configuration file. +""" + +import logging +from typing import ( + Any, + Optional, +) + +from galaxy.config.url_headers import UrlHeadersConfig +from galaxy.exceptions import ( + ConfigDoesNotAllowException, + RequestParameterMissingException, +) +from galaxy.security.vault import Vault + +# Default vault key prefix for headers +DEFAULT_VAULT_KEY_PREFIX = "headers" + +log = logging.getLogger(__name__) + + +def is_sensitive_header( + header_name: str, url_headers_config: Optional[UrlHeadersConfig] = None, url: Optional[str] = None +) -> bool: + """ + Check if a header contains sensitive information and should be encrypted. + + Args: + header_name: The header name to check + url_headers_config: URL headers configuration. This is required to determine + sensitivity as we no longer use hardcoded patterns. + url: Optional target URL for URL-specific header validation + + Returns: + True if the header should be encrypted, False otherwise + """ + if url_headers_config: + if url: + return url_headers_config.is_header_sensitive_for_url(header_name, url) + else: + # No URL provided - cannot perform URL-specific sensitivity checking + # In our pattern-based system, headers without URLs cannot be properly validated + # Default to not sensitive (individual header checking should not fail fast) + log.debug(f"No URL provided for sensitivity check of header '{header_name}' - defaulting to not sensitive") + return False + + # No configuration provided - default to not sensitive for security + # (better to not encrypt than to encrypt everything without knowing) + log.debug( + f"No URL headers configuration provided for sensitivity check of header '{header_name}' - defaulting to not sensitive" + ) + return False + + +def has_sensitive_headers( + data: dict, url_headers_config: Optional[UrlHeadersConfig] = None, url: Optional[str] = None +) -> bool: + """ + Check if the data structure contains any sensitive headers that would require encryption. + + This function recursively searches through a dictionary structure to detect + if any sensitive headers are present that would need to be encrypted. + + IMPORTANT: This function fails fast if headers are found but no configuration + is provided. + + Args: + data: The data dictionary structure to check for sensitive headers + url_headers_config: URL headers configuration for sensitivity checks (required if headers present) + url: Optional target URL for URL-specific header validation + + Returns: + True if sensitive headers are found, False otherwise + + Raises: + ConfigDoesNotAllowException: If headers are present but no configuration is provided + """ + if not url_headers_config: + # Without configuration, headers are not allowed at all + # Fail fast if any headers are found anywhere in the data structure + def check_for_headers(obj): + if isinstance(obj, dict): + for key, value in obj.items(): + if key == "headers" and isinstance(value, dict) and value: + header_names = list(value.keys()) + raise ConfigDoesNotAllowException( + "Headers are not allowed without proper URL headers configuration. " + f"Found headers: {header_names}. " + "If you need to use headers, please contact your Galaxy administrator to whitelist them." + ) + elif isinstance(value, (dict, list)): + check_for_headers(value) + elif isinstance(obj, list): + for item in obj: + if isinstance(item, (dict, list)): + check_for_headers(item) + + check_for_headers(data) + return False + + # Configuration exists - check for sensitive headers recursively + def check_sensitivity(obj, inherited_url=None): + if isinstance(obj, dict): + for key, value in obj.items(): + if key == "headers" and isinstance(value, dict) and value: + element_url = obj.get("url") if "url" in obj else inherited_url + + if not element_url: + header_names = list(value.keys()) + raise RequestParameterMissingException( + f"URL is required for header validation in pattern-based configuration. " + f"Found headers: {header_names}. " + f"Cannot validate headers without knowing the target URL." + ) + + for header_name in value.keys(): + if is_sensitive_header(header_name, url_headers_config, element_url): + return True + elif isinstance(value, (dict, list)): + if check_sensitivity(value, inherited_url): + return True + elif isinstance(obj, list): + for item in obj: + if isinstance(item, (dict, list)): + if check_sensitivity(item, inherited_url): + return True + return False + + return check_sensitivity(data, url) + + +def create_vault_key(context_id: str, header_name: str, key_prefix: Optional[str] = None) -> str: + """ + Create a vault key for storing a header value. + + Args: + context_id: Unique identifier for the context (e.g., UUID of a request, session ID, etc.) + header_name: Name of the header (will be normalized to lowercase) + key_prefix: Optional custom prefix for the vault key. Defaults to DEFAULT_VAULT_KEY_PREFIX + + Returns: + Vault key path for the header + """ + if key_prefix is None: + key_prefix = DEFAULT_VAULT_KEY_PREFIX + normalized_header = header_name.lower().replace("-", "_") + return f"{key_prefix}/{context_id}/{normalized_header}" + + +def create_vault_reference(header_name: str, reference_prefix: str = "VAULT_HEADER") -> str: + """ + Create a vault reference placeholder that will be stored in data structures. + + Args: + header_name: Name of the header + reference_prefix: Prefix for the vault reference. Defaults to "VAULT_HEADER" + + Returns: + Vault reference string that indicates this value should be decrypted + """ + return f"__{reference_prefix}_{header_name.upper().replace('-', '_')}__" + + +def encrypt_headers_in_data( + data: dict, + context_id: str, + vault: Vault, + key_prefix: Optional[str] = None, + reference_prefix: str = "VAULT_HEADER", + url_headers_config: Optional[UrlHeadersConfig] = None, +) -> dict: + """ + Recursively process data structure to encrypt sensitive headers. + + This function walks through a dictionary structure and: + 1. Identifies elements with headers + 2. Encrypts sensitive headers to the vault + 3. Replaces sensitive header values with vault references + + Args: + data: The data dictionary structure containing headers + context_id: Unique identifier for the context (e.g., UUID, session ID, etc.) + vault: Vault instance for encryption + key_prefix: Optional custom prefix for vault keys. Defaults to DEFAULT_VAULT_KEY_PREFIX + reference_prefix: Prefix for vault references. Defaults to "VAULT_HEADER" + url_headers_config: Optional URL headers configuration for sensitivity checks + + Returns: + Modified data structure with sensitive headers encrypted + + Raises: + ConfigDoesNotAllowException: If headers are present but proper configuration or URL is not provided + """ + # Validate headers before processing - this will fail fast if headers are found + # but configuration or URLs are missing + has_sensitive_headers(data, url_headers_config) + + # Make a deep copy to avoid modifying the original + encrypted_data: dict[str, Any] = {} + + for key, value in data.items(): + if key == "headers" and isinstance(value, dict): + # Look for a URL at the same level as the headers (e.g., in UrlDataElement) + element_url = data.get("url") if "url" in data else None + encrypted_data[key] = _encrypt_headers_dict( + value, context_id, vault, key_prefix, reference_prefix, url_headers_config, element_url + ) + elif isinstance(value, dict): + encrypted_data[key] = encrypt_headers_in_data( + value, context_id, vault, key_prefix, reference_prefix, url_headers_config + ) + elif isinstance(value, list): + encrypted_data[key] = [ + ( + encrypt_headers_in_data(item, context_id, vault, key_prefix, reference_prefix, url_headers_config) + if isinstance(item, dict) + else item + ) + for item in value + ] + else: + encrypted_data[key] = value + + return encrypted_data + + +def decrypt_headers_in_data( + data: dict, + context_id: str, + vault: Vault, + key_prefix: Optional[str] = None, + reference_prefix: str = "VAULT_HEADER", + url_headers_config: Optional[UrlHeadersConfig] = None, +) -> dict: + """ + Recursively process data structure to decrypt sensitive headers from vault. + + This function walks through a dictionary structure and: + 1. Identifies vault references in headers + 2. Decrypts the actual header values from the vault + 3. Replaces vault references with actual header values + + Args: + data: The data dictionary structure with vault references + context_id: Unique identifier for the context (e.g., UUID, session ID, etc.) + vault: Vault instance for decryption + key_prefix: Optional custom prefix for vault keys. Defaults to DEFAULT_VAULT_KEY_PREFIX + reference_prefix: Prefix for vault references. Defaults to "VAULT_HEADER" + url_headers_config: Optional URL headers configuration for sensitivity checks + + Returns: + Modified data structure with actual header values restored + """ + # Make a deep copy to avoid modifying the original + decrypted_data: dict[str, Any] = {} + + for key, value in data.items(): + if key == "headers" and isinstance(value, dict): + decrypted_data[key] = _decrypt_headers_dict(value, context_id, vault, key_prefix, reference_prefix) + elif isinstance(value, dict): + decrypted_data[key] = decrypt_headers_in_data( + value, context_id, vault, key_prefix, reference_prefix, url_headers_config + ) + elif isinstance(value, list): + decrypted_data[key] = [ + ( + decrypt_headers_in_data(item, context_id, vault, key_prefix, reference_prefix, url_headers_config) + if isinstance(item, dict) + else item + ) + for item in value + ] + else: + decrypted_data[key] = value + + return decrypted_data + + +def _encrypt_headers_dict( + headers: dict[str, str], + context_id: str, + vault: Vault, + key_prefix: Optional[str] = None, + reference_prefix: str = "VAULT_HEADER", + url_headers_config: Optional[UrlHeadersConfig] = None, + url: Optional[str] = None, +) -> dict[str, str]: + """ + Encrypt sensitive headers in a headers dictionary. + + Args: + headers: Dictionary of header name -> header value + context_id: Unique identifier for the context + vault: Vault instance for encryption + key_prefix: Optional custom prefix for vault keys + reference_prefix: Prefix for vault references + url_headers_config: Optional URL headers configuration for sensitivity checks + url: Optional target URL for URL-specific header validation + + Returns: + Dictionary with sensitive headers replaced by vault references + """ + encrypted_headers = {} + for header_name, header_value in headers.items(): + if is_sensitive_header(header_name, url_headers_config, url): + # Encrypt sensitive header + vault_key = create_vault_key(context_id, header_name, key_prefix) + vault.write_secret(vault_key, header_value) + encrypted_headers[header_name] = create_vault_reference(header_name, reference_prefix) + else: + # Keep non-sensitive headers as-is + encrypted_headers[header_name] = header_value + return encrypted_headers + + +def _decrypt_headers_dict( + headers: dict[str, str], + context_id: str, + vault: Vault, + key_prefix: Optional[str] = None, + reference_prefix: str = "VAULT_HEADER", +) -> dict[str, str]: + """ + Decrypt vault references in a headers dictionary. + + Args: + headers: Dictionary of header name -> header value (may contain vault references) + context_id: Unique identifier for the context + vault: Vault instance for decryption + key_prefix: Optional custom prefix for vault keys + reference_prefix: Prefix for vault references + + Returns: + Dictionary with vault references replaced by actual header values + """ + decrypted_headers = {} + for header_name, header_value in headers.items(): + if isinstance(header_value, str) and header_value.startswith(f"__{reference_prefix}_"): + # Decrypt vault reference + vault_key = create_vault_key(context_id, header_name, key_prefix) + decrypted_value = vault.read_secret(vault_key) + if decrypted_value is not None: + decrypted_headers[header_name] = decrypted_value + else: + # Handle case where vault key doesn't exist (shouldn't happen in normal flow) + log.warning( + f"Vault key not found for header '{header_name}' with key '{vault_key}'. Keeping vault reference as fallback." + ) + decrypted_headers[header_name] = header_value # Keep vault reference as fallback + else: + # Keep non-vault headers as-is + decrypted_headers[header_name] = header_value + return decrypted_headers diff --git a/lib/galaxy/managers/landing.py b/lib/galaxy/managers/landing.py index 38d085edab81..24a7c98a6cf3 100644 --- a/lib/galaxy/managers/landing.py +++ b/lib/galaxy/managers/landing.py @@ -1,3 +1,4 @@ +import logging from typing import ( Optional, Union, @@ -10,6 +11,8 @@ ) from sqlalchemy import select +from galaxy.config import GalaxyAppConfiguration +from galaxy.config.url_headers import UrlHeadersConfigFactory from galaxy.exceptions import ( InsufficientPermissionsException, ItemAlreadyClaimedException, @@ -37,6 +40,10 @@ WorkflowLandingRequest, ) from galaxy.security.idencoding import IdEncodingHelper +from galaxy.security.vault import ( + InvalidVaultConfigException, + Vault, +) from galaxy.structured_app import ( MinimalManagerApp, StructuredApp, @@ -52,6 +59,11 @@ ) from galaxy.util import safe_str_cmp from .context import ProvidesUserContext +from .headers_encryption import ( + decrypt_headers_in_data, + encrypt_headers_in_data, + has_sensitive_headers, +) from .tools import ( get_tool_from_toolbox, ToolRunReference, @@ -59,6 +71,10 @@ LandingRequestModel = Union[ToolLandingRequestModel, WorkflowLandingRequestModel] +FETCH_TOOL_ID = "__DATA_FETCH__" + +log = logging.getLogger(__name__) + class LandingRequestManager: @@ -68,11 +84,15 @@ def __init__( security: IdEncodingHelper, workflow_contents_manager: WorkflowContentsManager, app: MinimalManagerApp, + config: GalaxyAppConfiguration, + vault: Optional[Vault] = None, ): self.sa_session = sa_session self.security = security self.workflow_contents_manager = workflow_contents_manager self.app = app + self.vault = vault + self.url_headers_config = UrlHeadersConfigFactory.from_app_config(config) def create_tool_landing_request(self, payload: CreateToolLandingRequestPayload, user_id=None) -> ToolLandingRequest: tool_id = payload.tool_id @@ -87,7 +107,7 @@ def create_tool_landing_request(self, payload: CreateToolLandingRequestPayload, if hasattr(tool, "parameters"): internal_landing_request_state = landing_decode(landing_request_state, tool, self.security.decode_id) else: - assert tool.id == "__DATA_FETCH__" + assert tool.id == FETCH_TOOL_ID # we have validated the payload as part of the API request # nothing else to decode ideally so just swap to internal model state object internal_landing_request_state = LandingRequestInternalToolState( @@ -129,13 +149,18 @@ def create_tool_landing_request(self, payload: CreateToolLandingRequestPayload, model = ToolLandingRequestModel() model.tool_id = tool_id model.tool_version = tool_version - model.request_state = internal_landing_request_state.input_state model.uuid = uuid4() model.client_secret = payload.client_secret model.public = payload.public model.origin = str(payload.origin) if payload.origin else None if user_id: model.user_id = user_id + + request_state = self._encrypt_headers_in_request_state( + internal_landing_request_state.input_state, str(model.uuid) + ) + model.request_state = request_state + self._save(model) return self._tool_response(model) @@ -154,7 +179,11 @@ def create_workflow_landing_request(self, payload: CreateWorkflowLandingRequestP model.workflow_source = payload.workflow_id model.uuid = uuid4() model.client_secret = payload.client_secret - model.request_state = self.validate_workflow_request_state(payload.request_state) + + validated_request_state = self.validate_workflow_request_state(payload.request_state) + request_state = self._encrypt_headers_in_request_state(validated_request_state, str(model.uuid)) + model.request_state = request_state + model.public = payload.public self._save(model) return self._workflow_response(model) @@ -283,10 +312,12 @@ def _get_claimed_workflow_landing_request( return request def _tool_response(self, model: ToolLandingRequestModel) -> ToolLandingRequest: + request_state = self._decrypt_headers_in_request_state(model.request_state, str(model.uuid)) + response_model = ToolLandingRequest( tool_id=model.tool_id, tool_version=model.tool_version, - request_state=model.request_state, + request_state=request_state, uuid=model.uuid, state=self._state(model), origin=model.origin, @@ -306,10 +337,13 @@ def _workflow_response(self, model: WorkflowLandingRequestModel) -> WorkflowLand target_type = model.workflow_source_type workflow_id = model.workflow_source assert workflow_id + + request_state = self._decrypt_headers_in_request_state(model.request_state, str(model.uuid)) + response_model = WorkflowLandingRequest( workflow_id=self.security.encode_id(workflow_id) if isinstance(workflow_id, int) else workflow_id, workflow_target_type=target_type, - request_state=model.request_state, + request_state=request_state, uuid=model.uuid, state=self._state(model), origin=model.origin, @@ -329,3 +363,31 @@ def _save(self, model: LandingRequestModel): sa_session = self.sa_session sa_session.add(model) sa_session.commit() + + def _encrypt_headers_in_request_state(self, request_state: Optional[dict], landing_uuid: str) -> Optional[dict]: + if request_state is not None: + if has_sensitive_headers(request_state, self.url_headers_config): + if not self.vault: + raise InvalidVaultConfigException( + "Sensitive headers detected in landing request but no vault is configured. " + "Configure a vault to securely store sensitive header information." + ) + return encrypt_headers_in_data( + request_state, + landing_uuid, + self.vault, + key_prefix="landing_request/headers", + url_headers_config=self.url_headers_config, + ) + return request_state + + def _decrypt_headers_in_request_state(self, request_state: Optional[dict], landing_uuid: str): + if request_state is not None and self.vault: + return decrypt_headers_in_data( + request_state, + landing_uuid, + self.vault, + key_prefix="landing_request/headers", + url_headers_config=self.url_headers_config, + ) + return request_state diff --git a/lib/galaxy/schema/fetch_data.py b/lib/galaxy/schema/fetch_data.py index 91cfe8348e6e..6cc616bdc1a7 100644 --- a/lib/galaxy/schema/fetch_data.py +++ b/lib/galaxy/schema/fetch_data.py @@ -158,6 +158,7 @@ class PastedDataElement(BaseDataElement): class UrlDataElement(BaseDataElement): src: Literal["url"] url: str = Field(..., description="URL to upload") + headers: Optional[dict[str, str]] = Field(None, description="Optional headers to include in the URL fetch request") class ServerDirElement(BaseDataElement): diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index 36b70275f402..3d9f01154935 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -19,6 +19,10 @@ handle_upload, UploadProblemException, ) +from galaxy.files.models import ( + FilesSourceOptions, + PartialFilesSourceProperties, +) from galaxy.files.uris import ( ensure_file_sources, stream_to_file, @@ -540,8 +544,19 @@ def _has_src_to_path( is_link = True return name, path, is_link + headers = item.get("headers") + file_source_options: Optional[FilesSourceOptions] = None + if headers: + extra_props = PartialFilesSourceProperties(**{"http_headers": headers}) + file_source_options = FilesSourceOptions(extra_props=extra_props) + try: - path = stream_url_to_file(url, file_sources=upload_config.file_sources, dir=upload_config.working_directory) + path = stream_url_to_file( + url, + file_sources=upload_config.file_sources, + dir=upload_config.working_directory, + file_source_opts=file_source_options, + ) except Exception as e: raise Exception(f"Failed to fetch url {url}. {str(e)}") diff --git a/lib/galaxy_test/driver/integration_util.py b/lib/galaxy_test/driver/integration_util.py index 4ab96b0a6187..9912ad890577 100644 --- a/lib/galaxy_test/driver/integration_util.py +++ b/lib/galaxy_test/driver/integration_util.py @@ -383,3 +383,15 @@ def _disable_workflow_scheduling(cls, config): """ cls._configure_workflow_schedulers(noop_schedulers_conf, config) + + +class ConfigureAllowedUrlHeaders: + _test_driver: GalaxyTestDriver + + @classmethod + def _configure_allowed_url_headers(cls, allowed_url_headers_conf: str, config): + temp_directory = cls._test_driver.mkdtemp() + url_headers_conf_path = os.path.join(temp_directory, "url_headers_conf.yml") + with open(url_headers_conf_path, "w") as f: + f.write(allowed_url_headers_conf) + config["url_headers_config_file"] = url_headers_conf_path diff --git a/test/integration/test_landing_requests.py b/test/integration/test_landing_requests.py new file mode 100644 index 000000000000..0fd093333a7d --- /dev/null +++ b/test/integration/test_landing_requests.py @@ -0,0 +1,395 @@ +from typing import ( + cast, + Optional, +) + +from sqlalchemy import select + +from galaxy.managers.landing import LandingRequestModel +from galaxy.model import ( + ToolLandingRequest as ToolLandingRequestModel, + WorkflowLandingRequest as WorkflowLandingRequestModel, +) +from galaxy.schema.fetch_data import ( + CreateDataLandingPayload, + DataLandingRequestState, +) +from galaxy.schema.schema import ( + CreateWorkflowLandingRequestPayload, + ToolLandingRequest, +) +from galaxy_test.base.populators import ( + DatasetPopulator, + WorkflowPopulator, +) +from galaxy_test.driver import integration_util + +TEST_URL = "base64://eyJ0ZXN0IjogInRlc3QifQ==" # base64 encoded {"test": "test"} + +# URL headers configuration for tests - allows both sensitive and non-sensitive headers +ALLOW_URL_HEADERS_CONF = """ +patterns: + # Match all URLs (including base64://) for testing + - url_pattern: "^.*" + headers: + # Sensitive headers - will be encrypted when vault is configured + - name: Authorization + sensitive: true + - name: X-API-Key + sensitive: true + # Non-sensitive headers + - name: Content-Type + sensitive: false + - name: Accept + sensitive: false + - name: Accept-Language + sensitive: false + - name: Accept-Encoding + sensitive: false + - name: Cache-Control + sensitive: false + - name: X-Custom-Header + sensitive: false +""" + + +class BaseLandingRequestTest(integration_util.IntegrationTestCase, integration_util.ConfigureAllowedUrlHeaders): + """Base class with common setup for landing request tests.""" + + dataset_populator: DatasetPopulator + workflow_populator: WorkflowPopulator + + def setUp(self): + super().setUp() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + self.workflow_populator = WorkflowPopulator(self.galaxy_interactor) + + def _create_data_item_with_headers(self, headers: dict[str, str], url: str = TEST_URL) -> dict: + """Create a data item with specified headers.""" + return { + "src": "url", + "url": url, + "ext": "txt", + "deferred": False, + "headers": headers, + } + + def _create_data_landing_request_state( + self, headers: dict[str, str], url: str = TEST_URL + ) -> DataLandingRequestState: + """Create a DataLandingRequestState with specified headers.""" + return DataLandingRequestState( + targets=[ + { + "destination": {"type": "hdas"}, + "items": [self._create_data_item_with_headers(headers, url)], + } + ], + ) + + def _create_workflow_input_with_headers( + self, headers: dict[str, str], input_name: str = "WorkflowInput1", url: str = TEST_URL + ) -> dict[str, dict]: + """Create a workflow input with specified headers.""" + return { + input_name: { + "src": "url", + "url": url, + "ext": "txt", + "deferred": False, + "headers": headers, + } + } + + def _assert_headers_match(self, actual_headers: dict[str, str], expected_headers: dict[str, str]) -> None: + """Assert that headers match expected values.""" + for key, expected_value in expected_headers.items(): + assert ( + actual_headers[key] == expected_value + ), f"Header {key} mismatch: expected {expected_value}, got {actual_headers.get(key)}" + + def _extract_data_landing_headers(self, tool_landing: ToolLandingRequest) -> dict[str, str]: + """Extract headers from a tool landing response.""" + request_state = tool_landing.request_state + assert request_state, "Request state is None" + request_json = request_state["request_json"] + assert request_json, "Request JSON is None" + targets = request_json["targets"] + assert targets and len(targets) == 1, "Expected exactly one target" + target = targets[0] + assert "elements" in target and target["elements"], "No elements found in target" + assert len(target["elements"]) == 1, "Expected exactly one element" + element = target["elements"][0] + assert "headers" in element, "No headers found in element" + return element["headers"] + + def _verify_headers_encrypted_in_db( + self, landing_request_uuid: str, expect_not_to_find: list[str], model_class: type[LandingRequestModel] + ): + """Verify that sensitive headers are stored encrypted in the database.""" + landing_request = self._get_landing_request_from_db(landing_request_uuid, model_class) + assert ( + landing_request is not None + ), f"{model_class.__name__} with UUID {landing_request_uuid} not found in database" + request_state_json = landing_request.request_state + assert request_state_json is not None, "Request state is None in database" + request_state_json_str = str(request_state_json) + + for header_value in expect_not_to_find: + assert header_value not in request_state_json_str, f"Sensitive header {header_value} found in plain text" + + def _get_landing_request_from_db( + self, uuid: str, model_class: type[LandingRequestModel] + ) -> Optional[LandingRequestModel]: + """Get a landing request from the database by UUID.""" + session = self._app.model.session + stmt = select(model_class).where(model_class.uuid == uuid) + return cast(Optional[LandingRequestModel], session.execute(stmt).scalar_one_or_none()) + + def _create_and_make_public_workflow(self, workflow_name: str) -> str: + """Create a simple workflow and make it public.""" + workflow_id = self.workflow_populator.simple_workflow(workflow_name) + self.workflow_populator.make_public(workflow_id) + return workflow_id + + +class TestLandingRequestsIntegration(BaseLandingRequestTest, integration_util.ConfiguresDatabaseVault): + @classmethod + def handle_galaxy_config_kwds(cls, config): + super().handle_galaxy_config_kwds(config) + cls._configure_database_vault(config) + cls._configure_allowed_url_headers(ALLOW_URL_HEADERS_CONF, config) + + def test_data_landing_with_encrypted_headers(self): + """Test that sensitive headers are encrypted in the vault when stored in landing requests. + + This test verifies that headers containing sensitive information like authorization tokens + are encrypted using Galaxy's vault system instead of being stored in plain text in the + database. Headers are automatically detected and encrypted based on their names. + """ + # Create test headers with both sensitive and non-sensitive values + headers = { + "Authorization": "Bearer data-test-token-should-be-encrypted", + "X-API-Key": "data-test-api-key-123456", + "Accept": "application/json", + "Content-Type": "application/json", + "X-Custom-Header": "custom-value", + } + + # Create and execute data landing request + request_state = self._create_data_landing_request_state(headers) + payload = CreateDataLandingPayload(request_state=request_state, public=True) + response = self.dataset_populator.create_data_landing(payload) + assert response.tool_id == "__DATA_FETCH__" + + # Verify headers are preserved after decryption + tool_landing = self.dataset_populator.use_tool_landing(response.uuid) + actual_headers = self._extract_data_landing_headers(tool_landing) + self._assert_headers_match(actual_headers, headers) + + # Verify that sensitive headers are stored encrypted in the database + sensitive_values = ["Bearer data-test-token-should-be-encrypted", "data-test-api-key-123456"] + self._verify_headers_encrypted_in_db(str(response.uuid), sensitive_values, ToolLandingRequestModel) + + def test_workflow_landing_with_encrypted_headers(self): + """Test that sensitive headers are encrypted in workflow landing requests. + + This test verifies that headers containing sensitive information like authorization tokens + are encrypted using Galaxy's vault system when workflow landing requests contain URL fetch + steps with headers. + """ + # Create test headers for workflow inputs + input1_headers = { + "Authorization": "Bearer workflow-test-token-should-be-encrypted", + "X-API-Key": "workflow-test-api-key-123456", + "Accept": "application/json", + "Content-Type": "application/json", + } + input2_headers = { + "Authorization": "Bearer workflow-test-token-should-be-encrypted", + "X-API-Key": "workflow-test-api-key-123456", + "Accept-Language": "en-US", + "X-Custom-Header": "custom-value", + } + + # Create workflow request state with multiple inputs + workflow_request_state = {} + workflow_request_state.update(self._create_workflow_input_with_headers(input1_headers, "WorkflowInput1")) + workflow_request_state.update(self._create_workflow_input_with_headers(input2_headers, "WorkflowInput2")) + + # Create workflow and landing request + workflow_id = self._create_and_make_public_workflow("test_landing_encrypted_headers") + payload = CreateWorkflowLandingRequestPayload( + workflow_id=workflow_id, + workflow_target_type="stored_workflow", + request_state=workflow_request_state, + public=True, + ) + + # Create and retrieve workflow landing request + workflow_landing = self.dataset_populator.create_workflow_landing(payload) + assert workflow_landing.workflow_target_type == "stored_workflow" + + retrieved_workflow_landing = self.dataset_populator.use_workflow_landing(workflow_landing.uuid) + request_state = retrieved_workflow_landing.request_state + + # Verify headers are preserved in both workflow inputs + assert "WorkflowInput1" in request_state and "WorkflowInput2" in request_state + self._assert_headers_match(request_state["WorkflowInput1"]["headers"], input1_headers) + self._assert_headers_match(request_state["WorkflowInput2"]["headers"], input2_headers) + + # Verify that sensitive headers are stored encrypted in the database + sensitive_values = ["Bearer workflow-test-token-should-be-encrypted", "workflow-test-api-key-123456"] + self._verify_headers_encrypted_in_db(str(workflow_landing.uuid), sensitive_values, WorkflowLandingRequestModel) + + +class TestLandingRequestsWithoutVaultIntegration(BaseLandingRequestTest): + """Test landing requests when headers are configured but vault is not configured. + + This class tests the behavior when headers configuration exists but no vault is configured. + When sensitive headers are present, the system should fail because it cannot encrypt them. + """ + + @classmethod + def handle_galaxy_config_kwds(cls, config): + super().handle_galaxy_config_kwds(config) + # Configure headers but NOT vault - this tests the vault requirement for sensitive headers + cls._configure_allowed_url_headers(ALLOW_URL_HEADERS_CONF, config) + + def test_data_landing_fails_without_vault_when_sensitive_headers_present(self): + """Test that data landing requests fail when vault is not configured but sensitive headers are present. + + This test verifies that when sensitive headers (like Authorization, API keys, etc.) are present + in a landing request but no vault is configured, the system fails with a 500 error rather than + storing the sensitive information in plain text in the database. + """ + # Create headers with sensitive values + headers = { + "Authorization": "Bearer no-vault-test-token-should-fail", + "X-API-Key": "no-vault-test-api-key-should-fail", + "Accept": "application/json", + } + + # Create data landing request with sensitive headers + request_state = self._create_data_landing_request_state(headers) + payload = CreateDataLandingPayload(request_state=request_state, public=True) + + # Should return 500 status code when trying to create the landing request + # because sensitive headers are present but vault is not configured + response = self.dataset_populator.create_landing_raw(payload, "data") + assert response.status_code == 500 + + def test_data_landing_succeeds_without_vault_when_no_sensitive_headers(self): + """Test that data landing requests succeed when vault is not configured but no sensitive headers are present. + + This test verifies that when only non-sensitive headers are present in a landing request + and no vault is configured, the system works normally since encryption is not required. + """ + # Create only non-sensitive headers + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + "X-Custom-Header": "custom-value", + } + + # Create data landing request with only non-sensitive headers + request_state = self._create_data_landing_request_state(headers) + payload = CreateDataLandingPayload(request_state=request_state, public=True) + + # Should succeed because no sensitive headers are present + response = self.dataset_populator.create_data_landing(payload) + assert response.tool_id == "__DATA_FETCH__" + + # Verify we can retrieve the landing request and headers are preserved + tool_landing = self.dataset_populator.use_tool_landing(response.uuid) + actual_headers = self._extract_data_landing_headers(tool_landing) + self._assert_headers_match(actual_headers, headers) + + def test_workflow_landing_fails_without_vault_when_sensitive_headers_present(self): + """Test that workflow landing requests fail when vault is not configured but sensitive headers are present.""" + # Create workflow input with sensitive headers + headers = { + "Authorization": "Bearer workflow-no-vault-token-should-fail", + "X-API-Key": "workflow-no-vault-api-key-should-fail", + "Accept": "application/json", + } + workflow_request_state = self._create_workflow_input_with_headers(headers) + + # Create workflow and landing request + workflow_id = self._create_and_make_public_workflow("test_landing_no_vault") + payload = CreateWorkflowLandingRequestPayload( + workflow_id=workflow_id, + workflow_target_type="stored_workflow", + request_state=workflow_request_state, + public=True, + ) + + # Should return 500 status code when trying to create the workflow landing request + # because sensitive headers are present but vault is not configured + create_url = "workflow_landings" + json = payload.model_dump(mode="json") + response = self.dataset_populator._post(create_url, json, json=True, anon=True) + assert response.status_code == 500 + + +class TestLandingRequestsWithoutHeadersConfigIntegration(BaseLandingRequestTest): + """Test landing requests when no headers configuration exists. + + This class tests the behavior when no URL headers configuration file is present. + The system should fail fast with any headers (sensitive or not) because headers + require explicit configuration to be allowed. + """ + + def test_data_landing_fails_without_config(self): + """Test that data landing requests fail when no URL headers configuration exists. + + This test verifies the fail-fast behavior: when no URL headers configuration file + exists, ANY attempt to use headers (sensitive or not) will fail immediately with + a clear error message, rather than silently allowing or denying headers. + """ + # Create only non-sensitive headers + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + "X-Custom-Header": "custom-value", + } + + # Create data landing request with headers + request_state = self._create_data_landing_request_state(headers) + payload = CreateDataLandingPayload(request_state=request_state, public=True) + + # Should fail with 403 because no URL headers configuration is available (fail-fast) + response = self.dataset_populator.create_landing_raw(payload, "data") + assert response.status_code == 403 + assert "No URL headers configuration is available" in response.json()["err_msg"] + + def test_workflow_landing_fails_without_config(self): + """Test that workflow landing requests fail when no URL headers configuration exists. + + This test verifies the fail-fast behavior for workflow landings: when no URL headers + configuration file exists, any attempt to use headers will fail immediately. + """ + # Create workflow input with headers + headers = { + "Authorization": "Bearer workflow-no-vault-token-should-fail", + "X-API-Key": "workflow-no-vault-api-key-should-fail", + "Accept": "application/json", + } + workflow_request_state = self._create_workflow_input_with_headers(headers) + + # Create workflow and landing request + workflow_id = self._create_and_make_public_workflow("test_landing_no_config") + payload = CreateWorkflowLandingRequestPayload( + workflow_id=workflow_id, + workflow_target_type="stored_workflow", + request_state=workflow_request_state, + public=True, + ) + + # Should return 403 status code when trying to create the workflow landing request + # because no URL headers configuration is available + create_url = "workflow_landings" + json = payload.model_dump(mode="json") + response = self.dataset_populator._post(create_url, json, json=True, anon=True) + assert response.status_code == 403 + assert "No URL headers configuration is available" in response.json()["err_msg"] diff --git a/test/unit/app/managers/test_headers_encryption.py b/test/unit/app/managers/test_headers_encryption.py new file mode 100644 index 000000000000..5bf5cbeb45c6 --- /dev/null +++ b/test/unit/app/managers/test_headers_encryption.py @@ -0,0 +1,403 @@ +from typing import Optional + +import pytest + +from galaxy.config.url_headers import UrlHeadersConfigFactory +from galaxy.exceptions import ( + ConfigDoesNotAllowException, + RequestParameterMissingException, +) +from galaxy.managers.headers_encryption import ( + create_vault_key, + create_vault_reference, + decrypt_headers_in_data, + encrypt_headers_in_data, + has_sensitive_headers, +) +from galaxy.security.vault import Vault + + +class MockVault(Vault): + """Mock vault for testing encryption/decryption.""" + + def __init__(self): + self.storage = {} + + def write_secret(self, key: str, value: str) -> None: + self.storage[key] = value + + def read_secret(self, key: str) -> Optional[str]: + return self.storage.get(key) + + def list_secrets(self, key: str) -> list[str]: + """Mock implementation - not used in header tests.""" + return [] + + +def create_test_url_headers_config(): + config_dict = { + "patterns": [ + { + "url_pattern": r"^https://api\.github\.com/.*", + "headers": [ + {"name": "Authorization", "sensitive": True}, + {"name": "Accept", "sensitive": False}, + {"name": "Accept-Encoding", "sensitive": False}, + ], + }, + { + "url_pattern": r"^https://api\.example\.com/.*", + "headers": [ + {"name": "Authorization", "sensitive": True}, + {"name": "X-API-Key", "sensitive": True}, + {"name": "Content-Type", "sensitive": False}, + {"name": "Accept", "sensitive": False}, + ], + }, + { + "url_pattern": r"^https://.*", + "headers": [ + {"name": "Accept", "sensitive": False}, + {"name": "Accept-Language", "sensitive": False}, + {"name": "Content-Type", "sensitive": False}, + ], + }, + ] + } + + return UrlHeadersConfigFactory.from_dict(config_dict) + + +class TestSensitiveHeaderDetection: + """Test sensitive header pattern matching using real URL headers configuration.""" + + def test_sensitive_headers_detected(self): + """Test that configured sensitive headers are detected for matching URLs.""" + config = create_test_url_headers_config() + + # Test GitHub API URL - Authorization should be sensitive + assert config.is_header_sensitive_for_url("Authorization", "https://api.github.com/repos") + assert not config.is_header_sensitive_for_url("Accept", "https://api.github.com/repos") + assert not config.is_header_sensitive_for_url("Accept-Encoding", "https://api.github.com/repos") + + # Test API example URL - Authorization and X-API-Key should be sensitive + assert config.is_header_sensitive_for_url("Authorization", "https://api.example.com/data") + assert config.is_header_sensitive_for_url("X-API-Key", "https://api.example.com/data") + assert not config.is_header_sensitive_for_url("Content-Type", "https://api.example.com/data") + + def test_non_sensitive_headers_not_detected(self): + """Test that non-sensitive headers are not detected.""" + config = create_test_url_headers_config() + + # Test generic HTTPS URL - no sensitive headers configured + assert not config.is_header_sensitive_for_url("Accept", "https://example.com/data") + assert not config.is_header_sensitive_for_url("Accept-Language", "https://example.com/data") + assert not config.is_header_sensitive_for_url("Content-Type", "https://example.com/data") + + def test_headers_not_allowed_for_url(self): + """Test that headers not in patterns are not considered sensitive.""" + config = create_test_url_headers_config() + + # Header not in any pattern should not be sensitive + assert not config.is_header_sensitive_for_url("X-Custom-Header", "https://api.github.com/repos") + assert not config.is_header_sensitive_for_url("X-Custom-Header", "https://example.com/data") + + +class TestHasSensitiveHeaders: + """Test has_sensitive_headers function that recursively checks for sensitive headers.""" + + def test_detects_sensitive_headers(self): + """Test detection of sensitive headers in various structures.""" + config = create_test_url_headers_config() + + # Test with config and URL - should work normally + data_with_url = {"url": "https://api.github.com/repos", "headers": {"Authorization": "Bearer token"}} + assert has_sensitive_headers(data_with_url, url_headers_config=config) + + # Test nested structure with URL + nested_data = { + "request_json": { + "targets": [{"elements": [{"url": "https://api.example.com/data", "headers": {"X-API-Key": "secret"}}]}] + } + } + assert has_sensitive_headers(nested_data, url_headers_config=config) + + def test_ignores_non_sensitive_headers(self): + """Test that non-sensitive headers are ignored when URL is provided.""" + config = create_test_url_headers_config() + # When URL is provided, pattern-based checking is used + data = {"url": "https://example.com/api", "headers": {"Content-Type": "application/json"}} + assert not has_sensitive_headers(data, url_headers_config=config) + + def test_handles_missing_or_invalid_headers(self): + """Test edge cases with missing or invalid headers.""" + config = create_test_url_headers_config() + assert not has_sensitive_headers({}, url_headers_config=config) # Empty data + assert not has_sensitive_headers({"no_headers": "value"}, url_headers_config=config) # No headers key + assert not has_sensitive_headers({"headers": {}}, url_headers_config=config) # Empty headers (ignored) + assert not has_sensitive_headers({"headers": "not a dict"}, url_headers_config=config) # Invalid headers type + + def test_headers_without_url_fail_fast(self): + """Test that headers without URL fail fast in pattern-based system.""" + config = create_test_url_headers_config() + # Without URL, headers should fail fast since we can't validate them + data = {"headers": {"Content-Type": "application/json"}} + with pytest.raises(RequestParameterMissingException, match="URL is required for header validation"): + has_sensitive_headers(data, url_headers_config=config) + + def test_fails_without_config(self): + """Test that function fails fast when no configuration is provided.""" + + # Should raise ConfigDoesNotAllowException when headers exist but no config + with pytest.raises( + ConfigDoesNotAllowException, match="Headers are not allowed without proper URL headers configuration" + ): + has_sensitive_headers({"headers": {"Authorization": "Bearer token"}}) + + # Should raise ConfigDoesNotAllowException for nested headers too + nested_data = {"request_json": {"targets": [{"elements": [{"headers": {"X-API-Key": "secret"}}]}]}} + with pytest.raises( + ConfigDoesNotAllowException, match="Headers are not allowed without proper URL headers configuration" + ): + has_sensitive_headers(nested_data) + + # Should NOT raise error when no headers present + assert not has_sensitive_headers({}) # Empty data + assert not has_sensitive_headers({"no_headers": "value"}) # No headers key + + +class TestVaultKeyAndReference: + """Test vault key and reference creation.""" + + def test_create_vault_key_default_prefix(self): + """Test vault key creation with default prefix.""" + key = create_vault_key("uuid-123", "Authorization") + assert key == "headers/uuid-123/authorization" + + key = create_vault_key("uuid-456", "X-API-Key") + assert key == "headers/uuid-456/x_api_key" + + def test_create_vault_key_custom_prefix(self): + """Test vault key creation with custom prefix.""" + key = create_vault_key("uuid-123", "Authorization", "custom_prefix") + assert key == "custom_prefix/uuid-123/authorization" + + key = create_vault_key("uuid-456", "X-API-Key", "landing_request/headers") + assert key == "landing_request/headers/uuid-456/x_api_key" + + def test_create_vault_reference_default(self): + """Test vault reference creation with default prefix.""" + ref = create_vault_reference("Authorization") + assert ref == "__VAULT_HEADER_AUTHORIZATION__" + + ref = create_vault_reference("X-API-Key") + assert ref == "__VAULT_HEADER_X_API_KEY__" + + def test_create_vault_reference_custom_prefix(self): + """Test vault reference creation with custom prefix.""" + ref = create_vault_reference("Authorization", "CUSTOM_REF") + assert ref == "__CUSTOM_REF_AUTHORIZATION__" + + ref = create_vault_reference("X-API-Key", "SESSION_HEADER") + assert ref == "__SESSION_HEADER_X_API_KEY__" + + +class TestHeaderEncryptionDecryption: + """Test end-to-end header encryption and decryption.""" + + def test_encrypt_decrypt_simple_headers(self): + """Test encrypting and decrypting a simple headers structure.""" + vault = MockVault() + context_id = "test-uuid" + config = create_test_url_headers_config() + + # Simple case with headers at top level and URL for validation + data = { + "url": "https://api.example.com/data", + "headers": { + "Authorization": "Bearer secret-token", + "X-API-Key": "api-key-123", + "Accept": "application/json", + }, + } + + # Encrypt + encrypted = encrypt_headers_in_data(data, context_id, vault, url_headers_config=config) + + # Check that sensitive headers are encrypted + headers = encrypted["headers"] + assert headers["Authorization"] == "__VAULT_HEADER_AUTHORIZATION__" # Encrypted (sensitive) + assert headers["X-API-Key"] == "__VAULT_HEADER_X_API_KEY__" # Encrypted (sensitive) + assert headers["Accept"] == "application/json" + + # Check vault has the encrypted headers + assert len(vault.storage) == 2 + assert "headers/test-uuid/authorization" in vault.storage + assert "headers/test-uuid/x_api_key" in vault.storage + + # Decrypt + decrypted = decrypt_headers_in_data(encrypted, context_id, vault) + + # Check original values are restored + decrypted_headers = decrypted["headers"] + assert decrypted_headers["Authorization"] == "Bearer secret-token" + assert decrypted_headers["X-API-Key"] == "api-key-123" + assert decrypted_headers["Accept"] == "application/json" + + def test_encrypt_decrypt_nested_headers(self): + """Test encrypting and decrypting headers in a complex nested structure.""" + vault = MockVault() + context_id = "test-uuid" + config = create_test_url_headers_config() + + # Complex nested structure like in the actual data landing request + data: dict = { + "request_version": "1", + "request_json": { + "targets": [ + { + "destination": {"type": "hdas"}, + "elements": [ + { + "src": "url", + "url": "https://api.github.com/repos/test/repo", + "headers": { + "Authorization": "Bearer secret-token", + "Accept": "application/vnd.github.v3+json", + "Accept-Encoding": "gzip, deflate", + }, + } + ], + } + ] + }, + } + + # Encrypt + encrypted = encrypt_headers_in_data(data, context_id, vault, url_headers_config=config) + + # Check structure is preserved + assert encrypted["request_version"] == "1" + + # Check headers are encrypted based on GitHub API pattern + headers = encrypted["request_json"]["targets"][0]["elements"][0]["headers"] + assert headers["Authorization"] == "__VAULT_HEADER_AUTHORIZATION__" # Sensitive for GitHub API + assert headers["Accept"] == "application/vnd.github.v3+json" # Not sensitive + assert headers["Accept-Encoding"] == "gzip, deflate" # Not sensitive + + # Decrypt + decrypted = decrypt_headers_in_data(encrypted, context_id, vault) + + # Check original structure and values are restored + original_headers = data["request_json"]["targets"][0]["elements"][0]["headers"] + decrypted_headers = decrypted["request_json"]["targets"][0]["elements"][0]["headers"] + + assert decrypted_headers == original_headers + + def test_multiple_headers_sections(self): + """Test handling multiple headers sections in different parts of the structure.""" + vault = MockVault() + context_id = "test-uuid" + config = create_test_url_headers_config() + + # Multiple sections with different URLs + data = { + "section1": { + "url": "https://api.github.com/repos", + "headers": { + "Authorization": "Bearer token1", + "Accept": "application/vnd.github.v3+json", + }, + }, + "section2": { + "data": { + "url": "https://example.com/data", + "headers": { + "Accept": "application/json", + "Content-Type": "application/json", + }, + } + }, + } + + # Encrypt + encrypted = encrypt_headers_in_data(data, context_id, vault, url_headers_config=config) + + # Check section1 (GitHub API pattern - Authorization is sensitive) + section1_headers = encrypted["section1"]["headers"] + assert section1_headers["Authorization"] == "__VAULT_HEADER_AUTHORIZATION__" # Encrypted + assert section1_headers["Accept"] == "application/vnd.github.v3+json" # Not encrypted + + # Check section2 (generic HTTPS pattern - no sensitive headers) + section2_headers = encrypted["section2"]["data"]["headers"] + assert section2_headers["Accept"] == "application/json" # Not encrypted + assert section2_headers["Content-Type"] == "application/json" # Not encrypted + + # Decrypt + decrypted = decrypt_headers_in_data(encrypted, context_id, vault) + + # Check original values are preserved + assert decrypted["section1"]["headers"]["Authorization"] == "Bearer token1" + assert decrypted["section1"]["headers"]["Accept"] == "application/vnd.github.v3+json" + assert decrypted["section2"]["data"]["headers"]["Accept"] == "application/json" + assert decrypted["section2"]["data"]["headers"]["Content-Type"] == "application/json" + + def test_encrypt_fails_without_config(self): + """Test that encryption fails fast when no configuration is provided.""" + + vault = MockVault() + context_id = "test-uuid" + + data = { + "headers": { + "Authorization": "Bearer secret-token", + "X-API-Key": "api-key-123", + } + } + + # Should raise ConfigDoesNotAllowException when trying to encrypt without config + with pytest.raises( + ConfigDoesNotAllowException, match="Headers are not allowed without proper URL headers configuration" + ): + encrypt_headers_in_data(data, context_id, vault) + + def test_encrypt_headers_with_url_pattern_checking(self): + """Test encryption with URL-based pattern checking.""" + vault = MockVault() + context_id = "test-uuid" + config = create_test_url_headers_config() + + # Test: Headers WITHOUT URL should fail fast + data_no_url = { + "headers": { + "Authorization": "Bearer token", + "Content-Type": "application/json", + "Accept-Language": "en-US,en;q=0.9", + } + } + + with pytest.raises(RequestParameterMissingException, match="URL is required for header validation"): + encrypt_headers_in_data(data_no_url, context_id, vault, url_headers_config=config) + + # Test: Headers WITH URL - pattern-based checking works + vault2 = MockVault() # Fresh vault for second test + data_with_url = { + "url": "https://api.example.com/data", + "headers": { + "Authorization": "Bearer token", + "Content-Type": "application/json", + "Accept-Language": "en-US,en;q=0.9", + }, + } + + encrypted_with_url = encrypt_headers_in_data(data_with_url, context_id, vault2, url_headers_config=config) + headers_with_url = encrypted_with_url["headers"] + + # Only sensitive headers encrypted (URL-based pattern matching) + assert headers_with_url["Authorization"] == "__VAULT_HEADER_AUTHORIZATION__" # Sensitive for api.example.com + assert headers_with_url["Content-Type"] == "application/json" # Not sensitive + assert headers_with_url["Accept-Language"] == "en-US,en;q=0.9" # Not sensitive + + # Verify vault has the encrypted header + assert len(vault2.storage) == 1 + assert "headers/test-uuid/authorization" in vault2.storage diff --git a/test/unit/app/managers/test_headers_url_pattern_matching.py b/test/unit/app/managers/test_headers_url_pattern_matching.py new file mode 100644 index 000000000000..30c7aa9aeb52 --- /dev/null +++ b/test/unit/app/managers/test_headers_url_pattern_matching.py @@ -0,0 +1,155 @@ +from galaxy.config.url_headers import UrlHeadersConfigFactory + + +def create_overlapping_patterns_config(): + """Create config with multiple overlapping patterns to test all-matches behavior.""" + config_dict = { + "patterns": [ + { + "url_pattern": r"^https://api\.github\.com/.*", + "headers": [ + {"name": "Accept", "sensitive": False}, + {"name": "Content-Type", "sensitive": False}, + ], + }, + { + "url_pattern": r"^https://api\.github\.com/repos/.*", + "headers": [ + {"name": "Authorization", "sensitive": True}, + ], + }, + { + "url_pattern": r"^https://.*", + "headers": [ + {"name": "Accept-Encoding", "sensitive": False}, + ], + }, + ] + } + + return UrlHeadersConfigFactory.from_dict(config_dict) + + +class TestAllMatchesPatternBehavior: + """Test that all matching patterns are considered (union of permissions).""" + + def test_find_all_matching_returns_all_patterns(self): + """Test that find_all_matching returns all patterns that match a URL.""" + config = create_overlapping_patterns_config() + + # URL matches all three patterns + url = "https://api.github.com/repos/owner/repo" + matching = config.find_all_matching(url) + + assert len(matching) == 3 + + def test_header_allowed_checks_all_matching_patterns(self): + """Test that a header is allowed if ANY matching pattern allows it.""" + config = create_overlapping_patterns_config() + + url = "https://api.github.com/repos/owner/repo" + + # Each header should be allowed if it's in ANY matching pattern + assert config.is_header_allowed_for_url("Accept", url) # From github_basic + assert config.is_header_allowed_for_url("Content-Type", url) # From github_basic + assert config.is_header_allowed_for_url("Authorization", url) # From github_auth + assert config.is_header_allowed_for_url("Accept-Encoding", url) # From https_generic + + # Headers not in any pattern + assert not config.is_header_allowed_for_url("X-Custom-Header", url) + assert not config.is_header_allowed_for_url("Cookie", url) + + def test_header_allowed_subset_of_patterns(self): + """Test headers allowed when only subset of patterns match.""" + config = create_overlapping_patterns_config() + + # URL only matches github_basic and https_generic (not github_auth) + url = "https://api.github.com/users/octocat" + + assert config.is_header_allowed_for_url("Accept", url) # From github_basic + assert config.is_header_allowed_for_url("Accept-Encoding", url) # From https_generic + # Authorization not allowed because github_auth pattern doesn't match + assert not config.is_header_allowed_for_url("Authorization", url) + + def test_header_sensitive_secure_by_default(self): + """Test that if ANY pattern marks header sensitive, it's treated as sensitive.""" + config_dict = { + "patterns": [ + { + "url_pattern": r"^https://test1\.example\.com/.*", + "headers": [{"name": "Authorization", "sensitive": False}], + }, + { + "url_pattern": r"^https://.*\.example\.com/.*", + "headers": [{"name": "Authorization", "sensitive": True}], + }, + ] + } + config = UrlHeadersConfigFactory.from_dict(config_dict) + + # URL matches both patterns + url = "https://test1.example.com/api" + + # Should be treated as sensitive (secure by default) + assert config.is_header_sensitive_for_url("Authorization", url) + + def test_multiple_overlapping_patterns_union(self): + """Test that union of headers from all matching patterns is allowed.""" + config_dict = { + "patterns": [ + { + "url_pattern": r"^https://example\.com/.*", + "headers": [ + {"name": "Header-A", "sensitive": False}, + {"name": "Header-B", "sensitive": True}, + ], + }, + { + "url_pattern": r"^https://example\.com/api/.*", + "headers": [ + {"name": "Header-C", "sensitive": False}, + {"name": "Header-D", "sensitive": True}, + ], + }, + { + "url_pattern": r"^https://example\.com/api/v1/.*", + "headers": [ + {"name": "Header-E", "sensitive": False}, + ], + }, + ] + } + config = UrlHeadersConfigFactory.from_dict(config_dict) + + # URL matches all three patterns + url = "https://example.com/api/v1/resource" + + # All headers from all patterns should be allowed + assert config.is_header_allowed_for_url("Header-A", url) + assert config.is_header_allowed_for_url("Header-B", url) + assert config.is_header_allowed_for_url("Header-C", url) + assert config.is_header_allowed_for_url("Header-D", url) + assert config.is_header_allowed_for_url("Header-E", url) + + # Headers B and D should be sensitive + assert config.is_header_sensitive_for_url("Header-B", url) + assert config.is_header_sensitive_for_url("Header-D", url) + + # Headers A, C, E should not be sensitive + assert not config.is_header_sensitive_for_url("Header-A", url) + assert not config.is_header_sensitive_for_url("Header-C", url) + assert not config.is_header_sensitive_for_url("Header-E", url) + + def test_no_matching_patterns(self): + """Test behavior when no patterns match.""" + config = create_overlapping_patterns_config() + + # URL doesn't match any pattern + url = "http://example.com/api" # http, not https + + matching = config.find_all_matching(url) + assert len(matching) == 0 + + # No headers should be allowed + assert not config.is_header_allowed_for_url("Accept", url) + assert not config.is_header_allowed_for_url("Authorization", url) diff --git a/test/unit/app/managers/test_landing.py b/test/unit/app/managers/test_landing.py index ac7b9664256b..33a30eb424c5 100644 --- a/test/unit/app/managers/test_landing.py +++ b/test/unit/app/managers/test_landing.py @@ -76,6 +76,7 @@ def setUp(self): self.app.security, self.workflow_contents_manager, cast(MinimalManagerApp, MockApp()), + self.app.config, ) self.trans.app.trs_proxy = TrsProxy(GalaxyAppConfiguration(override_tempdir=False))