Skip to content

Commit 419079a

Browse files
committed
🚸(backend) make document search on title accent-insensitive
This should work in both cases: - search for "vélo" when the document title contains "velo" - search for "velo" when the document title contains "vélo"
1 parent ecd0656 commit 419079a

File tree

4 files changed

+56
-7
lines changed

4 files changed

+56
-7
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to
1010

1111
## Added
1212

13+
- 🚸(backend) make document search on title accent-insensitive #874
1314
- 🚩 add homepage feature flag #861
1415

1516

src/backend/core/api/filters.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,50 @@
11
"""API filters for Impress' core application."""
22

3+
import unicodedata
4+
35
from django.utils.translation import gettext_lazy as _
46

57
import django_filters
68

79
from core import models
810

911

12+
def remove_accents(value):
13+
"""Remove accents from a string (vélo -> velo)."""
14+
return "".join(
15+
c
16+
for c in unicodedata.normalize("NFD", value)
17+
if unicodedata.category(c) != "Mn"
18+
)
19+
20+
21+
class AccentInsensitiveCharFilter(django_filters.CharFilter):
22+
"""
23+
A custom CharFilter that filters on the accent-insensitive value searched.
24+
"""
25+
26+
def filter(self, qs, value):
27+
"""
28+
Apply the filter to the queryset using the unaccented version of the field.
29+
30+
Args:
31+
qs: The queryset to filter.
32+
value: The value to search for in the unaccented field.
33+
Returns:
34+
A filtered queryset.
35+
"""
36+
if value:
37+
value = remove_accents(value)
38+
return super().filter(qs, value)
39+
40+
1041
class DocumentFilter(django_filters.FilterSet):
1142
"""
12-
Custom filter for filtering documents.
43+
Custom filter for filtering documents on title (accent and case insensitive).
1344
"""
1445

15-
title = django_filters.CharFilter(
16-
field_name="title", lookup_expr="icontains", label=_("Title")
46+
title = AccentInsensitiveCharFilter(
47+
field_name="title", lookup_expr="unaccent__icontains", label=_("Title")
1748
)
1849

1950
class Meta:
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from django.contrib.postgres.operations import UnaccentExtension
2+
from django.db import migrations
3+
4+
5+
class Migration(migrations.Migration):
6+
dependencies = [
7+
("core", "0020_remove_is_public_add_field_attachments_and_duplicated_from"),
8+
]
9+
10+
operations = [UnaccentExtension()]

src/backend/core/tests/documents/test_api_documents_descendants_filters.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from rest_framework.test import APIClient
88

99
from core import factories
10+
from core.api.filters import remove_accents
1011

1112
fake = Faker()
1213
pytestmark = pytest.mark.django_db
@@ -49,14 +50,16 @@ def test_api_documents_descendants_filter_unknown_field():
4950
[
5051
("Project Alpha", 1), # Exact match
5152
("project", 2), # Partial match (case-insensitive)
52-
("Guide", 1), # Word match within a title
53+
("Guide", 2), # Word match within a title
5354
("Special", 0), # No match (nonexistent keyword)
5455
("2024", 2), # Match by numeric keyword
55-
("", 5), # Empty string
56+
("", 6), # Empty string
57+
("velo", 1), # Accent-insensitive match (velo vs vélo)
58+
("bêta", 1), # Accent-insensitive match (bêta vs beta)
5659
],
5760
)
5861
def test_api_documents_descendants_filter_title(query, nb_results):
59-
"""Authenticated users should be able to search documents by their title."""
62+
"""Authenticated users should be able to search documents by their unaccented title."""
6063
user = factories.UserFactory()
6164
client = APIClient()
6265
client.force_login(user)
@@ -70,6 +73,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
7073
"User Guide",
7174
"Financial Report 2024",
7275
"Annual Review 2024",
76+
"Guide du vélo urbain", # <-- Title with accent for accent-insensitive test
7377
]
7478
for title in titles:
7579
factories.DocumentFactory(title=title, parent=document)
@@ -85,4 +89,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
8589

8690
# Ensure all results contain the query in their title
8791
for result in results:
88-
assert query.lower().strip() in result["title"].lower()
92+
assert (
93+
remove_accents(query).lower().strip()
94+
in remove_accents(result["title"]).lower()
95+
)

0 commit comments

Comments
 (0)