Skip to content

Commit 9df8998

Browse files
fix: ensure accented characters are sorted with their base letters
1 parent 3973d1a commit 9df8998

File tree

4 files changed

+115
-4
lines changed

4 files changed

+115
-4
lines changed

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/main/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/ApiDocumentTransformer.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
import io.gravitee.rest.api.model.v4.api.GenericApiEntity;
2424
import io.gravitee.rest.api.service.ApiService;
2525
import io.gravitee.rest.api.service.impl.search.lucene.DocumentTransformer;
26+
import java.text.CollationKey;
27+
import java.text.Collator;
28+
import java.util.Base64;
29+
import java.util.Locale;
2630
import java.util.regex.Pattern;
2731
import org.apache.lucene.document.Document;
2832
import org.apache.lucene.document.Field;
@@ -77,6 +81,7 @@ public class ApiDocumentTransformer implements DocumentTransformer<GenericApiEnt
7781
public static final String FIELD_HAS_HEALTH_CHECK = "has_health_check";
7882

7983
private ApiService apiService;
84+
private final Collator collator = Collator.getInstance(Locale.ENGLISH);
8085

8186
public ApiDocumentTransformer(@Lazy ApiService apiService) {
8287
this.apiService = apiService;
@@ -214,12 +219,16 @@ private void appendPath(final Document doc, final int[] pathIndex, final String
214219
doc.add(new TextField(FIELD_HOSTS_SPLIT, host, Field.Store.NO));
215220
}
216221
if (pathIndex[0]++ == 0) {
217-
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, new BytesRef(QueryParser.escape(path))));
222+
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, toSortedValue(path)));
218223
}
219224
}
220225

221226
private BytesRef toSortedValue(String value) {
222-
return new BytesRef(SPECIAL_CHARS.matcher(value).replaceAll("").toLowerCase());
227+
if (value == null) return new BytesRef("");
228+
String cleaned = SPECIAL_CHARS.matcher(value).replaceAll("").toLowerCase(Locale.ENGLISH);
229+
collator.setStrength(Collator.SECONDARY);
230+
CollationKey key = collator.getCollationKey(cleaned);
231+
return new BytesRef(key.toByteArray());
223232
}
224233

225234
@Override

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/main/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/IndexableApiDocumentTransformer.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@
5858
import io.gravitee.definition.model.v4.nativeapi.kafka.KafkaListener;
5959
import io.gravitee.rest.api.model.search.Indexable;
6060
import io.gravitee.rest.api.service.impl.search.lucene.DocumentTransformer;
61+
import java.text.CollationKey;
62+
import java.text.Collator;
63+
import java.util.Base64;
64+
import java.util.Locale;
6165
import org.apache.lucene.document.Document;
6266
import org.apache.lucene.document.Field;
6367
import org.apache.lucene.document.LongPoint;
@@ -71,6 +75,8 @@
7175
@Component
7276
public class IndexableApiDocumentTransformer implements DocumentTransformer<IndexableApi> {
7377

78+
private final Collator collator = Collator.getInstance(Locale.ENGLISH);
79+
7480
@Override
7581
public Document transform(IndexableApi indexableApi) {
7682
var api = indexableApi.getApi();
@@ -231,7 +237,7 @@ private void appendPath(final Document doc, final int[] pathIndex, final String
231237
doc.add(new StringField(FIELD_PATHS, path, Field.Store.NO));
232238
doc.add(new TextField(FIELD_PATHS_SPLIT, path, Field.Store.NO));
233239
if (pathIndex[0]++ == 0) {
234-
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, new BytesRef(QueryParser.escape(path))));
240+
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, toSortedValue(path)));
235241
}
236242
}
237243

@@ -243,7 +249,11 @@ private void appendHost(Document doc, String host) {
243249
}
244250

245251
private BytesRef toSortedValue(String value) {
246-
return new BytesRef(SPECIAL_CHARS.matcher(value).replaceAll("").toLowerCase());
252+
if (value == null) return new BytesRef("");
253+
String cleaned = SPECIAL_CHARS.matcher(value).replaceAll("").toLowerCase(Locale.ENGLISH);
254+
collator.setStrength(Collator.SECONDARY);
255+
CollationKey key = collator.getCollationKey(cleaned);
256+
return new BytesRef(key.toByteArray());
247257
}
248258

249259
@Override

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/test/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/ApiDocumentTransformerTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,21 @@
2626
import io.gravitee.rest.api.model.UserEntity;
2727
import io.gravitee.rest.api.model.api.ApiEntity;
2828
import io.gravitee.rest.api.service.impl.ApiServiceImpl;
29+
import java.lang.reflect.Field;
30+
import java.lang.reflect.Method;
31+
import java.text.Collator;
32+
import java.util.ArrayList;
2933
import java.util.Arrays;
34+
import java.util.Comparator;
3035
import java.util.Date;
3136
import java.util.HashMap;
3237
import java.util.HashSet;
38+
import java.util.List;
39+
import java.util.Map;
3340
import org.apache.lucene.document.Document;
3441
import org.apache.lucene.document.LongPoint;
3542
import org.apache.lucene.index.IndexableField;
43+
import org.apache.lucene.util.BytesRef;
3644
import org.jetbrains.annotations.NotNull;
3745
import org.junit.Test;
3846
import org.junit.runner.RunWith;
@@ -135,4 +143,26 @@ private void assertDocumentMatchesInputApiEntity(ApiEntity toTransform, Document
135143
assertThat(toTransform.getDefinitionContext().getOrigin()).isEqualTo(transformed.get("origin"));
136144
assertThat("true").isEqualTo(transformed.get("has_health_check"));
137145
}
146+
147+
@Test
148+
public void shouldSortListCorrectlyWithCollatorAndBytesRef() throws Exception {
149+
List<String> names = List.of("nano", "zorro", "äther", "vem", "foo/bar", "épée", "épona", "öko", "bns-one");
150+
List<String> expectedSorted = List.of("äther", "bns-one", "épée", "épona", "foo/bar", "nano", "öko", "vem", "zorro");
151+
Method toSortedValueMethod = ApiDocumentTransformer.class.getDeclaredMethod("toSortedValue", String.class);
152+
toSortedValueMethod.setAccessible(true);
153+
154+
Field collatorField = ApiDocumentTransformer.class.getDeclaredField("collator");
155+
collatorField.setAccessible(true);
156+
Collator collator = (Collator) collatorField.get(cut);
157+
List<String> sortedByCollator = new ArrayList<>(names);
158+
sortedByCollator.sort(collator);
159+
Map<String, BytesRef> bytesRefMap = new HashMap<>();
160+
for (String name : names) {
161+
bytesRefMap.put(name, (BytesRef) toSortedValueMethod.invoke(cut, name));
162+
}
163+
List<String> sortedByBytesRef = new ArrayList<>(names);
164+
sortedByBytesRef.sort(Comparator.comparing(bytesRefMap::get));
165+
assertThat(sortedByCollator).isEqualTo(expectedSorted);
166+
assertThat(sortedByBytesRef).isEqualTo(expectedSorted);
167+
}
138168
}

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/test/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/IndexableApiDocumentTransformerTest.java

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,19 @@
5151
import io.gravitee.apim.core.membership.model.PrimaryOwnerEntity;
5252
import io.gravitee.apim.core.search.model.IndexableApi;
5353
import io.gravitee.definition.model.DefinitionVersion;
54+
import java.lang.reflect.Field;
55+
import java.lang.reflect.Method;
56+
import java.text.Collator;
57+
import java.util.ArrayList;
58+
import java.util.Comparator;
59+
import java.util.HashMap;
5460
import java.util.List;
5561
import java.util.Map;
5662
import java.util.Set;
63+
import java.util.stream.Collectors;
64+
import org.apache.logging.log4j.util.PropertySource;
5765
import org.apache.lucene.index.IndexableField;
66+
import org.apache.lucene.util.BytesRef;
5867
import org.assertj.core.api.SoftAssertions;
5968
import org.junit.jupiter.api.Test;
6069

@@ -295,4 +304,57 @@ void should_transform_a_native_api() {
295304
softly.assertThat(result.getFields(FIELD_HOSTS)).extracting(IndexableField::stringValue).contains("native.kafka");
296305
});
297306
}
307+
308+
@Test
309+
void should_sort_names_by_bytesref() throws Exception {
310+
List<String> names = List.of("nano", "zorro", "äther", "vem", "épée", "épona", "öko", "bns");
311+
List<String> expectedSorted = List.of("äther", "bns", "épée", "épona", "nano", "öko", "vem", "zorro");
312+
313+
Method toSortedValueMethod = IndexableApiDocumentTransformer.class.getDeclaredMethod("toSortedValue", String.class);
314+
toSortedValueMethod.setAccessible(true);
315+
Map<String, BytesRef> bytesRefMap = new HashMap<>();
316+
for (String name : names) {
317+
BytesRef key = (BytesRef) toSortedValueMethod.invoke(cut, name);
318+
bytesRefMap.put(name, key);
319+
}
320+
List<String> sortedByBytesRef = new ArrayList<>(names);
321+
sortedByBytesRef.sort(Comparator.comparing(bytesRefMap::get, BytesRef::compareTo));
322+
323+
// Also sort with collator directly for comparison
324+
List<String> sortedByCollator = new ArrayList<>(names);
325+
Field collatorField = IndexableApiDocumentTransformer.class.getDeclaredField("collator");
326+
collatorField.setAccessible(true);
327+
Collator collator = (Collator) collatorField.get(cut);
328+
sortedByCollator.sort(collator);
329+
330+
// Assertions
331+
assertThat(sortedByBytesRef).isEqualTo(expectedSorted);
332+
assertThat(sortedByCollator).isEqualTo(expectedSorted);
333+
}
334+
335+
@Test
336+
void should_sort_names_with_special_characters_correctly() throws Exception {
337+
List<String> names = List.of("épée-bar", "épée", "zorro/name", "äther", "nano");
338+
List<String> expectedSorted = List.of("äther", "épée", "épée-bar", "nano", "zorro/name");
339+
Method toSortedValueMethod = IndexableApiDocumentTransformer.class.getDeclaredMethod("toSortedValue", String.class);
340+
toSortedValueMethod.setAccessible(true);
341+
Map<String, BytesRef> bytesRefMap = new HashMap<>();
342+
for (String name : names) {
343+
BytesRef key = (BytesRef) toSortedValueMethod.invoke(cut, name);
344+
bytesRefMap.put(name, key);
345+
}
346+
List<String> sortedByBytesRef = new ArrayList<>(names);
347+
sortedByBytesRef.sort(Comparator.comparing(bytesRefMap::get, BytesRef::compareTo));
348+
349+
// Also sort with collator directly for comparison
350+
List<String> sortedByCollator = new ArrayList<>(names);
351+
Field collatorField = IndexableApiDocumentTransformer.class.getDeclaredField("collator");
352+
collatorField.setAccessible(true);
353+
Collator collator = (Collator) collatorField.get(cut);
354+
sortedByCollator.sort(collator);
355+
356+
// Assertions
357+
assertThat(sortedByBytesRef).isEqualTo(expectedSorted);
358+
assertThat(sortedByCollator).isEqualTo(expectedSorted);
359+
}
298360
}

0 commit comments

Comments
 (0)