Skip to content

[pylint] Implement missing-maxsplit-arg (PLC0207) #17454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8248b80
`[pylint]` Implement `missing-maxsplit-arg` (`PLC0207`)
vjurczenia Apr 16, 2025
38a9c81
Move rule to preview group
vjurczenia Apr 19, 2025
347e9b9
Use `find_argument_value` to check maxsplit value
vjurczenia Apr 19, 2025
5bec68b
Refactor to import required items from ast module instead of whole mo…
vjurczenia Apr 19, 2025
3595079
cargo fmt
vjurczenia Apr 19, 2025
e6e2bfe
Update tests
vjurczenia Apr 20, 2025
1f34119
Remove check of maxsplit value (aligned with Pylint)
vjurczenia Apr 20, 2025
df38ac6
Add functionality to check class members of type string
vjurczenia Apr 23, 2025
9705675
Remove unnecessary assignments from test
vjurczenia Apr 24, 2025
1d3502c
Add functionality to check sliced strings
vjurczenia Apr 24, 2025
8967bc8
Update snapshot
vjurczenia Apr 24, 2025
a61c622
Enhance sliced strings check to include chained slices
vjurczenia Apr 24, 2025
7342bbc
Add tests for class attribute named split (from Pylint)
vjurczenia Apr 24, 2025
d1bb448
Add functionality to check kwargs set via an unpacked dict literal
vjurczenia Apr 28, 2025
dfbde3e
Remove incorrect comments from test file
vjurczenia Apr 28, 2025
5053354
Add remaining test cases
vjurczenia Apr 28, 2025
b67bc21
Action comments
vjurczenia May 15, 2025
77cd518
Remove incorrect hyphen from comment
vjurczenia May 15, 2025
378de24
Add TODO test comments
vjurczenia May 16, 2025
5997aa7
Action comments
vjurczenia May 19, 2025
f7f6fbb
Merge branch 'main' into vjurczenia/missing_maxsplit_arg
ntBre May 28, 2025
98a0f50
cargo fmt
ntBre May 28, 2025
ab261cb
tidy up after merge
ntBre May 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
SEQ = "1,2,3"

class Foo(str):
class_str = "1,2,3"

def split(self, sep=None, maxsplit=-1) -> list[str]:
return super().split(sep, maxsplit)

class Bar():
split = "1,2,3"

# Errors
## Test split called directly on string literal
"1,2,3".split(",")[0] # [missing-maxsplit-arg]
"1,2,3".split(",")[-1] # [missing-maxsplit-arg]
"1,2,3".rsplit(",")[0] # [missing-maxsplit-arg]
"1,2,3".rsplit(",")[-1] # [missing-maxsplit-arg]

## Test split called on string variable
SEQ.split(",")[0] # [missing-maxsplit-arg]
SEQ.split(",")[-1] # [missing-maxsplit-arg]
SEQ.rsplit(",")[0] # [missing-maxsplit-arg]
SEQ.rsplit(",")[-1] # [missing-maxsplit-arg]

## Test split called on class attribute
Foo.class_str.split(",")[0] # [missing-maxsplit-arg]
Foo.class_str.split(",")[-1] # [missing-maxsplit-arg]
Foo.class_str.rsplit(",")[0] # [missing-maxsplit-arg]
Foo.class_str.rsplit(",")[-1] # [missing-maxsplit-arg]

## Test split called on sliced string
"1,2,3"[::-1].split(",")[0] # [missing-maxsplit-arg]
"1,2,3"[::-1][::-1].split(",")[0] # [missing-maxsplit-arg]
SEQ[:3].split(",")[0] # [missing-maxsplit-arg]
Foo.class_str[1:3].split(",")[-1] # [missing-maxsplit-arg]
"1,2,3"[::-1].rsplit(",")[0] # [missing-maxsplit-arg]
SEQ[:3].rsplit(",")[0] # [missing-maxsplit-arg]
Foo.class_str[1:3].rsplit(",")[-1] # [missing-maxsplit-arg]

## Test sep given as named argument
"1,2,3".split(sep=",")[0] # [missing-maxsplit-arg]
"1,2,3".split(sep=",")[-1] # [missing-maxsplit-arg]
"1,2,3".rsplit(sep=",")[0] # [missing-maxsplit-arg]
"1,2,3".rsplit(sep=",")[-1] # [missing-maxsplit-arg]

## Special cases
"1,2,3".split("\n")[0] # [missing-maxsplit-arg]
"1,2,3".split("split")[-1] # [missing-maxsplit-arg]
"1,2,3".rsplit("rsplit")[0] # [missing-maxsplit-arg]

## Test class attribute named split
Bar.split.split(",")[0] # [missing-maxsplit-arg]
Bar.split.split(",")[-1] # [missing-maxsplit-arg]
Bar.split.rsplit(",")[0] # [missing-maxsplit-arg]
Bar.split.rsplit(",")[-1] # [missing-maxsplit-arg]

## Test unpacked dict literal kwargs
"1,2,3".split(**{"sep": ","})[0] # [missing-maxsplit-arg]


# OK
## Test not accessing the first or last element
### Test split called directly on string literal
"1,2,3".split(",")[1]
"1,2,3".split(",")[-2]
"1,2,3".rsplit(",")[1]
"1,2,3".rsplit(",")[-2]

### Test split called on string variable
SEQ.split(",")[1]
SEQ.split(",")[-2]
SEQ.rsplit(",")[1]
SEQ.rsplit(",")[-2]

### Test split called on class attribute
Foo.class_str.split(",")[1]
Foo.class_str.split(",")[-2]
Foo.class_str.rsplit(",")[1]
Foo.class_str.rsplit(",")[-2]

### Test split called on sliced string
"1,2,3"[::-1].split(",")[1]
SEQ[:3].split(",")[1]
Foo.class_str[1:3].split(",")[-2]
"1,2,3"[::-1].rsplit(",")[1]
SEQ[:3].rsplit(",")[1]
Foo.class_str[1:3].rsplit(",")[-2]

### Test sep given as named argument
"1,2,3".split(sep=",")[1]
"1,2,3".split(sep=",")[-2]
"1,2,3".rsplit(sep=",")[1]
"1,2,3".rsplit(sep=",")[-2]

## Test varying maxsplit argument
### str.split() tests
"1,2,3".split(sep=",", maxsplit=1)[-1]
"1,2,3".split(sep=",", maxsplit=1)[0]
"1,2,3".split(sep=",", maxsplit=2)[-1]
"1,2,3".split(sep=",", maxsplit=2)[0]
"1,2,3".split(sep=",", maxsplit=2)[1]

### str.rsplit() tests
"1,2,3".rsplit(sep=",", maxsplit=1)[-1]
"1,2,3".rsplit(sep=",", maxsplit=1)[0]
"1,2,3".rsplit(sep=",", maxsplit=2)[-1]
"1,2,3".rsplit(sep=",", maxsplit=2)[0]
"1,2,3".rsplit(sep=",", maxsplit=2)[1]

## Test user-defined split
Foo("1,2,3").split(",")[0]
Foo("1,2,3").split(",")[-1]
Foo("1,2,3").rsplit(",")[0]
Foo("1,2,3").rsplit(",")[-1]

## Test split called on sliced list
["1", "2", "3"][::-1].split(",")[0]

## Test class attribute named split
Bar.split[0]
Bar.split[-1]
Bar.split[0]
Bar.split[-1]

## Test unpacked dict literal kwargs
"1,2,3".split(",", **{"maxsplit": 1})[0]
"1,2,3".split(**{"sep": ",", "maxsplit": 1})[0]


# TODO

## Test variable split result index
## TODO: These require the ability to resolve a variable name to a value
# Errors
result_index = 0
"1,2,3".split(",")[result_index] # TODO: [missing-maxsplit-arg]
result_index = -1
"1,2,3".split(",")[result_index] # TODO: [missing-maxsplit-arg]
# OK
result_index = 1
"1,2,3".split(",")[result_index]
result_index = -2
"1,2,3".split(",")[result_index]


## Test split result index modified in loop
## TODO: These require the ability to recognize being in a loop where:
## - the result of split called on a string is indexed by a variable
## - the variable index above is modified
# OK
result_index = 0
for j in range(3):
print(SEQ.split(",")[result_index])
result_index = result_index + 1


## Test accessor
## TODO: These require the ability to get the return type of a method
## (possibly via `typing::is_string`)
class Baz():
def __init__(self):
self.my_str = "1,2,3"

def get_string(self) -> str:
return self.my_str

# Errors
Baz().get_string().split(",")[0] # TODO: [missing-maxsplit-arg]
Baz().get_string().split(",")[-1] # TODO: [missing-maxsplit-arg]
# OK
Baz().get_string().split(",")[1]
Baz().get_string().split(",")[-2]


## Test unpacked dict instance kwargs
## TODO: These require the ability to resolve a dict variable name to a value
# Errors
kwargs_without_maxsplit = {"seq": ","}
"1,2,3".split(**kwargs_without_maxsplit)[0] # TODO: [missing-maxsplit-arg]
# OK
kwargs_with_maxsplit = {"maxsplit": 1}
"1,2,3".split(",", **kwargs_with_maxsplit)[0] # TODO: false positive
kwargs_with_maxsplit = {"sep": ",", "maxsplit": 1}
"1,2,3".split(**kwargs_with_maxsplit)[0] # TODO: false positive
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ pub(crate) fn expression(expr: &Expr, checker: &Checker) {
if checker.enabled(Rule::Airflow3Removal) {
airflow::rules::airflow_3_removal_expr(checker, expr);
}
if checker.enabled(Rule::MissingMaxsplitArg) {
pylint::rules::missing_maxsplit_arg(checker, value, slice, expr);
}
pandas_vet::rules::subscript(checker, value, expr);
}
Expr::Tuple(ast::ExprTuple {
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Pylint, "C0132") => (RuleGroup::Stable, rules::pylint::rules::TypeParamNameMismatch),
(Pylint, "C0205") => (RuleGroup::Stable, rules::pylint::rules::SingleStringSlots),
(Pylint, "C0206") => (RuleGroup::Stable, rules::pylint::rules::DictIndexMissingItems),
(Pylint, "C0207") => (RuleGroup::Preview, rules::pylint::rules::MissingMaxsplitArg),
(Pylint, "C0208") => (RuleGroup::Stable, rules::pylint::rules::IterationOverSet),
(Pylint, "C0414") => (RuleGroup::Stable, rules::pylint::rules::UselessImportAlias),
(Pylint, "C0415") => (RuleGroup::Preview, rules::pylint::rules::ImportOutsideTopLevel),
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pylint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ mod tests {
Path::new("bad_staticmethod_argument.py")
)]
#[test_case(Rule::LenTest, Path::new("len_as_condition.py"))]
#[test_case(Rule::MissingMaxsplitArg, Path::new("missing_maxsplit_arg.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
134 changes: 134 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/missing_maxsplit_arg.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::{
DictItem, Expr, ExprAttribute, ExprCall, ExprDict, ExprNumberLiteral, ExprStringLiteral,
ExprSubscript, ExprUnaryOp, Keyword, Number, UnaryOp,
};
use ruff_python_semantic::{analyze::typing, SemanticModel};
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for access to the first or last element of `str.split()` without
/// `maxsplit=1`
///
/// ## Why is this bad?
/// Calling `str.split()` without `maxsplit` set splits on every delimiter in the
/// string. When accessing only the first or last element of the result, it
/// would be more efficient to only split once.
///
/// ## Example
/// ```python
/// url = "www.example.com"
/// prefix = url.split(".")[0]
/// ```
///
/// Use instead:
/// ```python
/// url = "www.example.com"
/// prefix = url.split(".", maxsplit=1)[0]
/// ```

#[derive(ViolationMetadata)]
pub(crate) struct MissingMaxsplitArg;

impl Violation for MissingMaxsplitArg {
#[derive_message_formats]
fn message(&self) -> String {
"Accessing only the first or last element of `str.split()` without setting `maxsplit=1`"
.to_string()
}
}

fn is_string(expr: &Expr, semantic: &SemanticModel) -> bool {
if let Expr::Name(name) = expr {
semantic
.only_binding(name)
.is_some_and(|binding_id| typing::is_string(semantic.binding(binding_id), semantic))
} else if let Some(binding_id) = semantic.lookup_attribute(expr) {
typing::is_string(semantic.binding(binding_id), semantic)
} else {
expr.is_string_literal_expr()
}
}

/// PLC0207
pub(crate) fn missing_maxsplit_arg(checker: &Checker, value: &Expr, slice: &Expr, expr: &Expr) {
// Check the sliced expression is a function
let Expr::Call(ExprCall {
func, arguments, ..
}) = value
else {
return;
};

// Check the slice index is either 0 or -1 (first or last value)
let index = match slice {
Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(number_value),
..
}) => number_value.as_i64(),
Expr::UnaryOp(ExprUnaryOp {
op: UnaryOp::USub,
operand,
..
}) => match operand.as_ref() {
Expr::NumberLiteral(ExprNumberLiteral {
value: Number::Int(number_value),
..
}) => number_value.as_i64().map(|number| -number),
_ => return,
},
_ => return,
};

if !matches!(index, Some(0 | -1)) {
return;
}

let Expr::Attribute(ExprAttribute { attr, value, .. }) = func.as_ref() else {
return;
};

// Check the function is "split" or "rsplit"
let attr = attr.as_str();
if !matches!(attr, "split" | "rsplit") {
return;
}

let mut target_instance = value;
// a subscripted value could technically be subscripted further ad infinitum, so we
// recurse into the subscript expressions until we find the value being subscripted
while let Expr::Subscript(ExprSubscript { value, .. }) = target_instance.as_ref() {
target_instance = value;
}

// Check the function is called on a string
if !is_string(target_instance, checker.semantic()) {
return;
}

// Check the function does not have maxsplit set
if arguments.find_argument_value("maxsplit", 1).is_some() {
return;
}

// Check maxsplit kwarg not set via unpacked dict literal
for keyword in &*arguments.keywords {
let Keyword { value, .. } = keyword;

if let Expr::Dict(ExprDict { items, .. }) = value {
for item in items {
let DictItem { key, .. } = item;
if let Some(Expr::StringLiteral(ExprStringLiteral { value, .. })) = key {
if value.to_str() == "maxsplit" {
return;
}
}
}
}
}

checker.report_diagnostic(Diagnostic::new(MissingMaxsplitArg, expr.range()));
}
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub(crate) use logging::*;
pub(crate) use magic_value_comparison::*;
pub(crate) use manual_import_from::*;
pub(crate) use misplaced_bare_raise::*;
pub(crate) use missing_maxsplit_arg::*;
pub(crate) use modified_iterating_set::*;
pub(crate) use named_expr_without_context::*;
pub(crate) use nan_comparison::*;
Expand Down Expand Up @@ -155,6 +156,7 @@ mod logging;
mod magic_value_comparison;
mod manual_import_from;
mod misplaced_bare_raise;
mod missing_maxsplit_arg;
mod modified_iterating_set;
mod named_expr_without_context;
mod nan_comparison;
Expand Down
Loading