astral-sh · ntBre · May 28, 2025 · Apr 16, 2025 · Apr 19, 2025 · Apr 19, 2025
diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/missing_maxsplit_arg.py b/crates/ruff_linter/resources/test/fixtures/pylint/missing_maxsplit_arg.py
@@ -0,0 +1,184 @@
+SEQ = "1,2,3"
+
+class Foo(str):
+    class_str = "1,2,3"
+
+    def split(self, sep=None, maxsplit=-1) -> list[str]:
+        return super().split(sep, maxsplit)
+
+class Bar():
+    split = "1,2,3"
+
+# Errors
+## Test split called directly on string literal
+"1,2,3".split(",")[0]  # [missing-maxsplit-arg]
+"1,2,3".split(",")[-1]  # [missing-maxsplit-arg]
+"1,2,3".rsplit(",")[0]  # [missing-maxsplit-arg]
+"1,2,3".rsplit(",")[-1]  # [missing-maxsplit-arg]
+
+## Test split called on string variable
+SEQ.split(",")[0]  # [missing-maxsplit-arg]
+SEQ.split(",")[-1]  # [missing-maxsplit-arg]
+SEQ.rsplit(",")[0]  # [missing-maxsplit-arg]
+SEQ.rsplit(",")[-1]  # [missing-maxsplit-arg]
+
+## Test split called on class attribute
+Foo.class_str.split(",")[0]  # [missing-maxsplit-arg]
+Foo.class_str.split(",")[-1]  # [missing-maxsplit-arg]
+Foo.class_str.rsplit(",")[0]  # [missing-maxsplit-arg]
+Foo.class_str.rsplit(",")[-1]  # [missing-maxsplit-arg]
+
+## Test split called on sliced string
+"1,2,3"[::-1].split(",")[0]  # [missing-maxsplit-arg]
+"1,2,3"[::-1][::-1].split(",")[0]  # [missing-maxsplit-arg]
+SEQ[:3].split(",")[0]  # [missing-maxsplit-arg]
+Foo.class_str[1:3].split(",")[-1]  # [missing-maxsplit-arg]
+"1,2,3"[::-1].rsplit(",")[0]  # [missing-maxsplit-arg]
+SEQ[:3].rsplit(",")[0]  # [missing-maxsplit-arg]
+Foo.class_str[1:3].rsplit(",")[-1]  # [missing-maxsplit-arg]
+
+## Test sep given as named argument
+"1,2,3".split(sep=",")[0]  # [missing-maxsplit-arg]
+"1,2,3".split(sep=",")[-1]  # [missing-maxsplit-arg]
+"1,2,3".rsplit(sep=",")[0]  # [missing-maxsplit-arg]
+"1,2,3".rsplit(sep=",")[-1]  # [missing-maxsplit-arg]
+
+## Special cases
+"1,2,3".split("\n")[0]  # [missing-maxsplit-arg]
+"1,2,3".split("split")[-1]  # [missing-maxsplit-arg]
+"1,2,3".rsplit("rsplit")[0]  # [missing-maxsplit-arg]
+
+## Test class attribute named split
+Bar.split.split(",")[0]  # [missing-maxsplit-arg]
+Bar.split.split(",")[-1]  # [missing-maxsplit-arg]
+Bar.split.rsplit(",")[0]  # [missing-maxsplit-arg]
+Bar.split.rsplit(",")[-1]  # [missing-maxsplit-arg]
+
+## Test unpacked dict literal kwargs 
+"1,2,3".split(**{"sep": ","})[0]  # [missing-maxsplit-arg]
+
+
+# OK
+## Test not accessing the first or last element
+### Test split called directly on string literal
+"1,2,3".split(",")[1]
+"1,2,3".split(",")[-2]
+"1,2,3".rsplit(",")[1]
+"1,2,3".rsplit(",")[-2]
+
+### Test split called on string variable
+SEQ.split(",")[1]
+SEQ.split(",")[-2]
+SEQ.rsplit(",")[1]
+SEQ.rsplit(",")[-2]
+
+### Test split called on class attribute
+Foo.class_str.split(",")[1]
+Foo.class_str.split(",")[-2]
+Foo.class_str.rsplit(",")[1]
+Foo.class_str.rsplit(",")[-2]
+
+### Test split called on sliced string
+"1,2,3"[::-1].split(",")[1]
+SEQ[:3].split(",")[1]
+Foo.class_str[1:3].split(",")[-2]
+"1,2,3"[::-1].rsplit(",")[1]
+SEQ[:3].rsplit(",")[1]
+Foo.class_str[1:3].rsplit(",")[-2]
+
+### Test sep given as named argument
+"1,2,3".split(sep=",")[1]
+"1,2,3".split(sep=",")[-2]
+"1,2,3".rsplit(sep=",")[1]
+"1,2,3".rsplit(sep=",")[-2]
+
+## Test varying maxsplit argument
+### str.split() tests
+"1,2,3".split(sep=",", maxsplit=1)[-1]
+"1,2,3".split(sep=",", maxsplit=1)[0]
+"1,2,3".split(sep=",", maxsplit=2)[-1]
+"1,2,3".split(sep=",", maxsplit=2)[0]
+"1,2,3".split(sep=",", maxsplit=2)[1]
+
+### str.rsplit() tests
+"1,2,3".rsplit(sep=",", maxsplit=1)[-1]
+"1,2,3".rsplit(sep=",", maxsplit=1)[0]
+"1,2,3".rsplit(sep=",", maxsplit=2)[-1]
+"1,2,3".rsplit(sep=",", maxsplit=2)[0]
+"1,2,3".rsplit(sep=",", maxsplit=2)[1]
+
+## Test user-defined split
+Foo("1,2,3").split(",")[0]
+Foo("1,2,3").split(",")[-1]
+Foo("1,2,3").rsplit(",")[0]
+Foo("1,2,3").rsplit(",")[-1]
+
+## Test split called on sliced list
+["1", "2", "3"][::-1].split(",")[0]
+
+## Test class attribute named split
+Bar.split[0]
+Bar.split[-1]
+Bar.split[0]
+Bar.split[-1]
+
+## Test unpacked dict literal kwargs 
+"1,2,3".split(",", **{"maxsplit": 1})[0]
+"1,2,3".split(**{"sep": ",", "maxsplit": 1})[0]
+
+
+# TODO
+
+## Test variable split result index
+## TODO: These require the ability to resolve a variable name to a value
+# Errors
+result_index = 0
+"1,2,3".split(",")[result_index]  # TODO: [missing-maxsplit-arg]
+result_index = -1
+"1,2,3".split(",")[result_index]  # TODO: [missing-maxsplit-arg]
+# OK 
+result_index = 1
+"1,2,3".split(",")[result_index]
+result_index = -2
+"1,2,3".split(",")[result_index]
+
+
+## Test split result index modified in loop
+## TODO: These require the ability to recognize being in a loop where:
+##     - the result of split called on a string is indexed by a variable
+##     - the variable index above is modified
+# OK
+result_index = 0
+for j in range(3):
+    print(SEQ.split(",")[result_index])
+    result_index = result_index + 1
+
+
+## Test accessor
+## TODO: These require the ability to get the return type of a method
+## (possibly via `typing::is_string`)
+class Baz():
+    def __init__(self):
+        self.my_str = "1,2,3"
+
+    def get_string(self) -> str:
+        return self.my_str
+
+# Errors
+Baz().get_string().split(",")[0]  # TODO: [missing-maxsplit-arg]
+Baz().get_string().split(",")[-1]  # TODO: [missing-maxsplit-arg]
+# OK
+Baz().get_string().split(",")[1]
+Baz().get_string().split(",")[-2]
+
+
+## Test unpacked dict instance kwargs
+## TODO: These require the ability to resolve a dict variable name to a value
+# Errors
+kwargs_without_maxsplit = {"seq": ","}
+"1,2,3".split(**kwargs_without_maxsplit)[0]  # TODO: [missing-maxsplit-arg]
+# OK
+kwargs_with_maxsplit = {"maxsplit": 1}
+"1,2,3".split(",", **kwargs_with_maxsplit)[0]  # TODO: false positive
+kwargs_with_maxsplit = {"sep": ",", "maxsplit": 1}
+"1,2,3".split(**kwargs_with_maxsplit)[0]  # TODO: false positive
diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs
@@ -176,6 +176,9 @@ pub(crate) fn expression(expr: &Expr, checker: &Checker) {
             if checker.enabled(Rule::Airflow3Removal) {
                 airflow::rules::airflow_3_removal_expr(checker, expr);
             }
+            if checker.enabled(Rule::MissingMaxsplitArg) {
+                pylint::rules::missing_maxsplit_arg(checker, value, slice, expr);
+            }
             pandas_vet::rules::subscript(checker, value, expr);
         }
         Expr::Tuple(ast::ExprTuple {

diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs
@@ -198,6 +198,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
         (Pylint, "C0132") => (RuleGroup::Stable, rules::pylint::rules::TypeParamNameMismatch),
         (Pylint, "C0205") => (RuleGroup::Stable, rules::pylint::rules::SingleStringSlots),
         (Pylint, "C0206") => (RuleGroup::Stable, rules::pylint::rules::DictIndexMissingItems),
+        (Pylint, "C0207") => (RuleGroup::Preview, rules::pylint::rules::MissingMaxsplitArg),
         (Pylint, "C0208") => (RuleGroup::Stable, rules::pylint::rules::IterationOverSet),
         (Pylint, "C0414") => (RuleGroup::Stable, rules::pylint::rules::UselessImportAlias),
         (Pylint, "C0415") => (RuleGroup::Preview, rules::pylint::rules::ImportOutsideTopLevel),

diff --git a/crates/ruff_linter/src/rules/pylint/mod.rs b/crates/ruff_linter/src/rules/pylint/mod.rs
@@ -231,6 +231,7 @@ mod tests {
         Path::new("bad_staticmethod_argument.py")
     )]
     #[test_case(Rule::LenTest, Path::new("len_as_condition.py"))]
+    #[test_case(Rule::MissingMaxsplitArg, Path::new("missing_maxsplit_arg.py"))]
     fn rules(rule_code: Rule, path: &Path) -> Result<()> {
         let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
         let diagnostics = test_path(

diff --git a/crates/ruff_linter/src/rules/pylint/rules/missing_maxsplit_arg.rs b/crates/ruff_linter/src/rules/pylint/rules/missing_maxsplit_arg.rs
@@ -0,0 +1,134 @@
+use ruff_diagnostics::{Diagnostic, Violation};
+use ruff_macros::{derive_message_formats, ViolationMetadata};
+use ruff_python_ast::{
+    DictItem, Expr, ExprAttribute, ExprCall, ExprDict, ExprNumberLiteral, ExprStringLiteral,
+    ExprSubscript, ExprUnaryOp, Keyword, Number, UnaryOp,
+};
+use ruff_python_semantic::{analyze::typing, SemanticModel};
+use ruff_text_size::Ranged;
+
+use crate::checkers::ast::Checker;
+
+/// ## What it does
+/// Checks for access to the first or last element of `str.split()` without
+/// `maxsplit=1`
+///
+/// ## Why is this bad?
+/// Calling `str.split()` without `maxsplit` set splits on every delimiter in the
+/// string. When accessing only the first or last element of the result, it
+/// would be more efficient to only split once.
+///
+/// ## Example
+/// ```python
+/// url = "www.example.com"
+/// prefix = url.split(".")[0]
+/// ```
+///
+/// Use instead:
+/// ```python
+/// url = "www.example.com"
+/// prefix = url.split(".", maxsplit=1)[0]
+/// ```
+
+#[derive(ViolationMetadata)]
+pub(crate) struct MissingMaxsplitArg;
+
+impl Violation for MissingMaxsplitArg {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        "Accessing only the first or last element of `str.split()` without setting `maxsplit=1`"
+            .to_string()
+    }
+}
+
+fn is_string(expr: &Expr, semantic: &SemanticModel) -> bool {
+    if let Expr::Name(name) = expr {
+        semantic
+            .only_binding(name)
+            .is_some_and(|binding_id| typing::is_string(semantic.binding(binding_id), semantic))
+    } else if let Some(binding_id) = semantic.lookup_attribute(expr) {
+        typing::is_string(semantic.binding(binding_id), semantic)
+    } else {
+        expr.is_string_literal_expr()
+    }
+}
+
+/// PLC0207
+pub(crate) fn missing_maxsplit_arg(checker: &Checker, value: &Expr, slice: &Expr, expr: &Expr) {
+    // Check the sliced expression is a function
+    let Expr::Call(ExprCall {
+        func, arguments, ..
+    }) = value
+    else {
+        return;
+    };
+
+    // Check the slice index is either 0 or -1 (first or last value)
+    let index = match slice {
+        Expr::NumberLiteral(ExprNumberLiteral {
+            value: Number::Int(number_value),
+            ..
+        }) => number_value.as_i64(),
+        Expr::UnaryOp(ExprUnaryOp {
+            op: UnaryOp::USub,
+            operand,
+            ..
+        }) => match operand.as_ref() {
+            Expr::NumberLiteral(ExprNumberLiteral {
+                value: Number::Int(number_value),
+                ..
+            }) => number_value.as_i64().map(|number| -number),
+            _ => return,
+        },
+        _ => return,
+    };
+
+    if !matches!(index, Some(0 | -1)) {
+        return;
+    }
+
+    let Expr::Attribute(ExprAttribute { attr, value, .. }) = func.as_ref() else {
+        return;
+    };
+
+    // Check the function is "split" or "rsplit"
+    let attr = attr.as_str();
+    if !matches!(attr, "split" | "rsplit") {
+        return;
+    }
+
+    let mut target_instance = value;
+    // a subscripted value could technically be subscripted further ad infinitum, so we
+    // recurse into the subscript expressions until we find the value being subscripted
+    while let Expr::Subscript(ExprSubscript { value, .. }) = target_instance.as_ref() {
+        target_instance = value;
+    }
+
+    // Check the function is called on a string
+    if !is_string(target_instance, checker.semantic()) {
+        return;
+    }
+
+    // Check the function does not have maxsplit set
+    if arguments.find_argument_value("maxsplit", 1).is_some() {
+        return;
+    }
+
+    // Check maxsplit kwarg not set via unpacked dict literal
+    for keyword in &*arguments.keywords {
+        let Keyword { value, .. } = keyword;
+
+        if let Expr::Dict(ExprDict { items, .. }) = value {
+            for item in items {
+                let DictItem { key, .. } = item;
+                if let Some(Expr::StringLiteral(ExprStringLiteral { value, .. })) = key {
+                    if value.to_str() == "maxsplit" {
+                        return;
+                    }
+                }
+            }
+        }
+    }
+
+    checker.report_diagnostic(Diagnostic::new(MissingMaxsplitArg, expr.range()));
+}
diff --git a/crates/ruff_linter/src/rules/pylint/rules/mod.rs b/crates/ruff_linter/src/rules/pylint/rules/mod.rs
@@ -46,6 +46,7 @@ pub(crate) use logging::*;
 pub(crate) use magic_value_comparison::*;
 pub(crate) use manual_import_from::*;
 pub(crate) use misplaced_bare_raise::*;
+pub(crate) use missing_maxsplit_arg::*;
 pub(crate) use modified_iterating_set::*;
 pub(crate) use named_expr_without_context::*;
 pub(crate) use nan_comparison::*;
@@ -155,6 +156,7 @@ mod logging;
 mod magic_value_comparison;
 mod manual_import_from;
 mod misplaced_bare_raise;
+mod missing_maxsplit_arg;
 mod modified_iterating_set;
 mod named_expr_without_context;
 mod nan_comparison;