|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +r""" |
| 3 | +================================================ |
| 4 | +B615: Test for unsafe Hugging Face Hub downloads |
| 5 | +================================================ |
| 6 | +
|
| 7 | +This plugin checks for unsafe downloads from Hugging Face Hub without proper |
| 8 | +integrity verification. Downloading models, datasets, or files without |
| 9 | +specifying a revision based on an immmutable revision (commit) can |
| 10 | +lead to supply chain attacks where malicious actors could |
| 11 | +replace model files and use an existing tag or branch name |
| 12 | +to serve malicious content. |
| 13 | +
|
| 14 | +The secure approach is to: |
| 15 | +
|
| 16 | +1. Pin to specific revisions/commits when downloading models, files or datasets |
| 17 | +
|
| 18 | +Common unsafe patterns: |
| 19 | +- ``AutoModel.from_pretrained("org/model-name")`` |
| 20 | +- ``AutoModel.from_pretrained("org/model-name", revision="main")`` |
| 21 | +- ``AutoModel.from_pretrained("org/model-name", revision="v1.0.0")`` |
| 22 | +- ``load_dataset("org/dataset-name")`` without revision |
| 23 | +- ``load_dataset("org/dataset-name", revision="main")`` |
| 24 | +- ``load_dataset("org/dataset-name", revision="v1.0")`` |
| 25 | +- ``AutoTokenizer.from_pretrained("org/model-name")`` |
| 26 | +- ``AutoTokenizer.from_pretrained("org/model-name", revision="main")`` |
| 27 | +- ``AutoTokenizer.from_pretrained("org/model-name", revision="v3.3.0")`` |
| 28 | +- ``hf_hub_download(repo_id="org/model_name", filename="file_name")`` |
| 29 | +- ``hf_hub_download(repo_id="org/model_name", |
| 30 | + filename="file_name", |
| 31 | + revision="main" |
| 32 | + )`` |
| 33 | +- ``hf_hub_download(repo_id="org/model_name", |
| 34 | + filename="file_name", |
| 35 | + revision="v2.0.0" |
| 36 | + )`` |
| 37 | +- ``snapshot_download(repo_id="org/model_name")`` |
| 38 | +- ``snapshot_download(repo_id="org/model_name", revision="main")`` |
| 39 | +- ``snapshot_download(repo_id="org/model_name", revision="refs/pr/1")`` |
| 40 | +
|
| 41 | +
|
| 42 | +:Example: |
| 43 | +
|
| 44 | +.. code-block:: none |
| 45 | +
|
| 46 | + >> Issue: Unsafe Hugging Face Hub download without revision pinning |
| 47 | + Severity: Medium Confidence: High |
| 48 | + CWE: CWE-494 (https://cwe.mitre.org/data/definitions/494.html) |
| 49 | + Location: examples/huggingface_unsafe_download.py:8 |
| 50 | + 7 # Unsafe: no revision specified |
| 51 | + 8 model = AutoModel.from_pretrained("org/model_name") |
| 52 | + 9 |
| 53 | +
|
| 54 | +.. seealso:: |
| 55 | +
|
| 56 | + - https://cwe.mitre.org/data/definitions/494.html |
| 57 | + - https://huggingface.co/docs/huggingface_hub/en/guides/download |
| 58 | +
|
| 59 | +.. versionadded:: 1.8.6 |
| 60 | +
|
| 61 | +""" |
| 62 | +import string |
| 63 | + |
| 64 | +import bandit |
| 65 | +from bandit.core import issue |
| 66 | +from bandit.core import test_properties as test |
| 67 | + |
| 68 | + |
| 69 | +@test.checks("Call") |
| 70 | +@test.test_id("B615") |
| 71 | +def huggingface_unsafe_download(context): |
| 72 | + """ |
| 73 | + This plugin checks for unsafe artifact download from Hugging Face Hub |
| 74 | + without immutable/reproducible revision pinning. |
| 75 | + """ |
| 76 | + # Check if any HuggingFace-related modules are imported |
| 77 | + hf_modules = [ |
| 78 | + "transformers", |
| 79 | + "datasets", |
| 80 | + "huggingface_hub", |
| 81 | + ] |
| 82 | + |
| 83 | + # Check if any HF modules are imported |
| 84 | + hf_imported = any( |
| 85 | + context.is_module_imported_like(module) for module in hf_modules |
| 86 | + ) |
| 87 | + |
| 88 | + if not hf_imported: |
| 89 | + return |
| 90 | + |
| 91 | + qualname = context.call_function_name_qual |
| 92 | + if not isinstance(qualname, str): |
| 93 | + return |
| 94 | + |
| 95 | + unsafe_patterns = { |
| 96 | + # transformers library patterns |
| 97 | + "from_pretrained": ["transformers"], |
| 98 | + # datasets library patterns |
| 99 | + "load_dataset": ["datasets"], |
| 100 | + # huggingface_hub patterns |
| 101 | + "hf_hub_download": ["huggingface_hub"], |
| 102 | + "snapshot_download": ["huggingface_hub"], |
| 103 | + "repository_id": ["huggingface_hub"], |
| 104 | + } |
| 105 | + |
| 106 | + qualname_parts = qualname.split(".") |
| 107 | + func_name = qualname_parts[-1] |
| 108 | + |
| 109 | + if func_name not in unsafe_patterns: |
| 110 | + return |
| 111 | + |
| 112 | + required_modules = unsafe_patterns[func_name] |
| 113 | + if not any(module in qualname_parts for module in required_modules): |
| 114 | + return |
| 115 | + |
| 116 | + # Check for revision parameter (the key security control) |
| 117 | + revision_value = context.get_call_arg_value("revision") |
| 118 | + commit_id_value = context.get_call_arg_value("commit_id") |
| 119 | + |
| 120 | + # Check if a revision or commit_id is specified |
| 121 | + revision_to_check = revision_value or commit_id_value |
| 122 | + |
| 123 | + if revision_to_check is not None: |
| 124 | + # Check if it's a secure revision (looks like a commit hash) |
| 125 | + # Commit hashes: 40 chars (full SHA) or 7+ chars (short SHA) |
| 126 | + if isinstance(revision_to_check, str): |
| 127 | + # Remove quotes if present |
| 128 | + revision_str = str(revision_to_check).strip("\"'") |
| 129 | + |
| 130 | + # Check if it looks like a commit hash (hexadecimal string) |
| 131 | + # Must be at least 7 characters and all hexadecimal |
| 132 | + is_hex = all(c in string.hexdigits for c in revision_str) |
| 133 | + if len(revision_str) >= 7 and is_hex: |
| 134 | + # This looks like a commit hash, which is secure |
| 135 | + return |
| 136 | + |
| 137 | + # Edge case: check if this is a local path (starts with ./ or /) |
| 138 | + first_arg = context.get_call_arg_at_position(0) |
| 139 | + if first_arg and isinstance(first_arg, str): |
| 140 | + if first_arg.startswith(("./", "/", "../")): |
| 141 | + # Local paths are generally safer |
| 142 | + return |
| 143 | + |
| 144 | + return bandit.Issue( |
| 145 | + severity=bandit.MEDIUM, |
| 146 | + confidence=bandit.HIGH, |
| 147 | + text=( |
| 148 | + f"Unsafe Hugging Face Hub download without revision pinning " |
| 149 | + f"in {func_name}()" |
| 150 | + ), |
| 151 | + cwe=issue.Cwe.DOWNLOAD_OF_CODE_WITHOUT_INTEGRITY_CHECK, |
| 152 | + lineno=context.get_lineno_for_call_arg(func_name), |
| 153 | + ) |
0 commit comments