Skip to content

Commit 36b201b

Browse files
fix: ensure accented characters are sorted with their base letters
1 parent 752036e commit 36b201b

File tree

4 files changed

+117
-4
lines changed

4 files changed

+117
-4
lines changed

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/main/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/ApiDocumentTransformer.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
import io.gravitee.rest.api.model.v4.api.GenericApiEntity;
2424
import io.gravitee.rest.api.service.ApiService;
2525
import io.gravitee.rest.api.service.impl.search.lucene.DocumentTransformer;
26+
import java.text.CollationKey;
27+
import java.text.Collator;
28+
import java.util.Base64;
29+
import java.util.Locale;
2630
import java.util.regex.Pattern;
2731
import org.apache.lucene.document.Document;
2832
import org.apache.lucene.document.Field;
@@ -77,6 +81,7 @@ public class ApiDocumentTransformer implements DocumentTransformer<GenericApiEnt
7781
public static final String FIELD_HAS_HEALTH_CHECK = "has_health_check";
7882

7983
private ApiService apiService;
84+
private final Collator collator = Collator.getInstance(Locale.ENGLISH);
8085

8186
public ApiDocumentTransformer(@Lazy ApiService apiService) {
8287
this.apiService = apiService;
@@ -214,12 +219,16 @@ private void appendPath(final Document doc, final int[] pathIndex, final String
214219
doc.add(new TextField(FIELD_HOSTS_SPLIT, host, Field.Store.NO));
215220
}
216221
if (pathIndex[0]++ == 0) {
217-
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, new BytesRef(QueryParser.escape(path))));
222+
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, toSortedValue(path)));
218223
}
219224
}
220225

221226
private BytesRef toSortedValue(String value) {
222-
return new BytesRef(SPECIAL_CHARS.matcher(value).replaceAll("").toLowerCase());
227+
if (value == null) return new BytesRef("");
228+
String cleaned = SPECIAL_CHARS.matcher(value).replaceAll("");
229+
collator.setStrength(Collator.SECONDARY);
230+
CollationKey key = collator.getCollationKey(cleaned);
231+
return new BytesRef(key.toByteArray());
223232
}
224233

225234
@Override

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/main/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/IndexableApiDocumentTransformer.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@
5858
import io.gravitee.definition.model.v4.nativeapi.kafka.KafkaListener;
5959
import io.gravitee.rest.api.model.search.Indexable;
6060
import io.gravitee.rest.api.service.impl.search.lucene.DocumentTransformer;
61+
import java.text.CollationKey;
62+
import java.text.Collator;
63+
import java.util.Base64;
64+
import java.util.Locale;
6165
import org.apache.lucene.document.Document;
6266
import org.apache.lucene.document.Field;
6367
import org.apache.lucene.document.LongPoint;
@@ -71,6 +75,8 @@
7175
@Component
7276
public class IndexableApiDocumentTransformer implements DocumentTransformer<IndexableApi> {
7377

78+
private final Collator collator = Collator.getInstance(Locale.ENGLISH);
79+
7480
@Override
7581
public Document transform(IndexableApi indexableApi) {
7682
var api = indexableApi.getApi();
@@ -231,7 +237,7 @@ private void appendPath(final Document doc, final int[] pathIndex, final String
231237
doc.add(new StringField(FIELD_PATHS, path, Field.Store.NO));
232238
doc.add(new TextField(FIELD_PATHS_SPLIT, path, Field.Store.NO));
233239
if (pathIndex[0]++ == 0) {
234-
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, new BytesRef(QueryParser.escape(path))));
240+
doc.add(new SortedDocValuesField(FIELD_PATHS_SORTED, toSortedValue(path)));
235241
}
236242
}
237243

@@ -243,7 +249,11 @@ private void appendHost(Document doc, String host) {
243249
}
244250

245251
private BytesRef toSortedValue(String value) {
246-
return new BytesRef(SPECIAL_CHARS.matcher(value).replaceAll("").toLowerCase());
252+
if (value == null) return new BytesRef("");
253+
String cleaned = SPECIAL_CHARS.matcher(value).replaceAll("");
254+
collator.setStrength(Collator.SECONDARY);
255+
CollationKey key = collator.getCollationKey(cleaned);
256+
return new BytesRef(key.toByteArray());
247257
}
248258

249259
@Override

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/test/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/ApiDocumentTransformerTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,21 @@
2626
import io.gravitee.rest.api.model.UserEntity;
2727
import io.gravitee.rest.api.model.api.ApiEntity;
2828
import io.gravitee.rest.api.service.impl.ApiServiceImpl;
29+
import java.lang.reflect.Field;
30+
import java.lang.reflect.Method;
31+
import java.text.Collator;
32+
import java.util.ArrayList;
2933
import java.util.Arrays;
34+
import java.util.Comparator;
3035
import java.util.Date;
3136
import java.util.HashMap;
3237
import java.util.HashSet;
38+
import java.util.List;
39+
import java.util.Map;
3340
import org.apache.lucene.document.Document;
3441
import org.apache.lucene.document.LongPoint;
3542
import org.apache.lucene.index.IndexableField;
43+
import org.apache.lucene.util.BytesRef;
3644
import org.jetbrains.annotations.NotNull;
3745
import org.junit.Test;
3846
import org.junit.runner.RunWith;
@@ -135,4 +143,26 @@ private void assertDocumentMatchesInputApiEntity(ApiEntity toTransform, Document
135143
assertThat(toTransform.getDefinitionContext().getOrigin()).isEqualTo(transformed.get("origin"));
136144
assertThat("true").isEqualTo(transformed.get("has_health_check"));
137145
}
146+
147+
@Test
148+
public void shouldSortListCorrectlyWithCollatorAndBytesRef() throws Exception {
149+
List<String> names = List.of("nano", "Zorro", "äther", "vem", "foo/bar", "Épée", "épona", "öko", "bns-One");
150+
List<String> expectedSorted = List.of("äther", "bns-One", "Épée", "épona", "foo/bar", "nano", "öko", "vem", "Zorro");
151+
Method toSortedValueMethod = ApiDocumentTransformer.class.getDeclaredMethod("toSortedValue", String.class);
152+
toSortedValueMethod.setAccessible(true);
153+
154+
Field collatorField = ApiDocumentTransformer.class.getDeclaredField("collator");
155+
collatorField.setAccessible(true);
156+
Collator collator = (Collator) collatorField.get(cut);
157+
List<String> sortedByCollator = new ArrayList<>(names);
158+
sortedByCollator.sort(collator);
159+
Map<String, BytesRef> bytesRefMap = new HashMap<>();
160+
for (String name : names) {
161+
bytesRefMap.put(name, (BytesRef) toSortedValueMethod.invoke(cut, name));
162+
}
163+
List<String> sortedByBytesRef = new ArrayList<>(names);
164+
sortedByBytesRef.sort(Comparator.comparing(bytesRefMap::get));
165+
assertThat(sortedByCollator).isEqualTo(expectedSorted);
166+
assertThat(sortedByBytesRef).isEqualTo(expectedSorted);
167+
}
138168
}

gravitee-apim-rest-api/gravitee-apim-rest-api-service/src/test/java/io/gravitee/rest/api/service/impl/search/lucene/transformer/IndexableApiDocumentTransformerTest.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import static io.gravitee.rest.api.service.impl.search.lucene.transformer.ApiDocumentTransformer.FIELD_TAGS;
4343
import static io.gravitee.rest.api.service.impl.search.lucene.transformer.ApiDocumentTransformer.FIELD_TAGS_SPLIT;
4444
import static io.gravitee.rest.api.service.impl.search.lucene.transformer.ApiDocumentTransformer.FIELD_TYPE;
45+
import static io.gravitee.rest.api.service.impl.search.lucene.transformer.ApiDocumentTransformer.SPECIAL_CHARS;
4546
import static org.assertj.core.api.Assertions.assertThat;
4647
import static org.assertj.core.api.Assertions.catchThrowable;
4748

@@ -51,10 +52,20 @@
5152
import io.gravitee.apim.core.membership.model.PrimaryOwnerEntity;
5253
import io.gravitee.apim.core.search.model.IndexableApi;
5354
import io.gravitee.definition.model.DefinitionVersion;
55+
import java.lang.reflect.Field;
56+
import java.lang.reflect.Method;
57+
import java.text.Collator;
58+
import java.util.ArrayList;
59+
import java.util.Comparator;
60+
import java.util.HashMap;
5461
import java.util.List;
62+
import java.util.Locale;
5563
import java.util.Map;
5664
import java.util.Set;
65+
import java.util.stream.Collectors;
66+
import org.apache.logging.log4j.util.PropertySource;
5767
import org.apache.lucene.index.IndexableField;
68+
import org.apache.lucene.util.BytesRef;
5869
import org.assertj.core.api.SoftAssertions;
5970
import org.junit.jupiter.api.Test;
6071

@@ -295,4 +306,57 @@ void should_transform_a_native_api() {
295306
softly.assertThat(result.getFields(FIELD_HOSTS)).extracting(IndexableField::stringValue).contains("native.kafka");
296307
});
297308
}
309+
310+
@Test
311+
void should_sort_names_by_bytesref() throws Exception {
312+
List<String> names = List.of("Nano", "zorro", "äther", "Vem", "épée", "épona", "Öko", "bns");
313+
List<String> expectedSorted = List.of("äther", "bns", "épée", "épona", "Nano", "Öko", "Vem", "zorro");
314+
315+
Method toSortedValueMethod = IndexableApiDocumentTransformer.class.getDeclaredMethod("toSortedValue", String.class);
316+
toSortedValueMethod.setAccessible(true);
317+
Map<String, BytesRef> bytesRefMap = new HashMap<>();
318+
for (String name : names) {
319+
BytesRef key = (BytesRef) toSortedValueMethod.invoke(cut, name);
320+
bytesRefMap.put(name, key);
321+
}
322+
List<String> sortedByBytesRef = new ArrayList<>(names);
323+
sortedByBytesRef.sort(Comparator.comparing(bytesRefMap::get, BytesRef::compareTo));
324+
325+
// Also sort with collator directly for comparison
326+
List<String> sortedByCollator = new ArrayList<>(names);
327+
Field collatorField = IndexableApiDocumentTransformer.class.getDeclaredField("collator");
328+
collatorField.setAccessible(true);
329+
Collator collator = (Collator) collatorField.get(cut);
330+
sortedByCollator.sort(collator);
331+
332+
// Assertions
333+
assertThat(sortedByBytesRef).isEqualTo(expectedSorted);
334+
assertThat(sortedByCollator).isEqualTo(expectedSorted);
335+
}
336+
337+
@Test
338+
void should_sort_names_with_special_characters_correctly() throws Exception {
339+
List<String> names = List.of("épée-bar", "épée", "zorro/name", "äther", "nano");
340+
List<String> expectedSorted = List.of("äther", "épée", "épée-bar", "nano", "zorro/name");
341+
Method toSortedValueMethod = IndexableApiDocumentTransformer.class.getDeclaredMethod("toSortedValue", String.class);
342+
toSortedValueMethod.setAccessible(true);
343+
Map<String, BytesRef> bytesRefMap = new HashMap<>();
344+
for (String name : names) {
345+
BytesRef key = (BytesRef) toSortedValueMethod.invoke(cut, name);
346+
bytesRefMap.put(name, key);
347+
}
348+
List<String> sortedByBytesRef = new ArrayList<>(names);
349+
sortedByBytesRef.sort(Comparator.comparing(bytesRefMap::get, BytesRef::compareTo));
350+
351+
// Also sort with collator directly for comparison
352+
List<String> sortedByCollator = new ArrayList<>(names);
353+
Field collatorField = IndexableApiDocumentTransformer.class.getDeclaredField("collator");
354+
collatorField.setAccessible(true);
355+
Collator collator = (Collator) collatorField.get(cut);
356+
sortedByCollator.sort(collator);
357+
358+
// Assertions
359+
assertThat(sortedByBytesRef).isEqualTo(expectedSorted);
360+
assertThat(sortedByCollator).isEqualTo(expectedSorted);
361+
}
298362
}

0 commit comments

Comments
 (0)