Skip to content

Commit 5ae12c6

Browse files
[8.19] ESQL: Fix FieldAttribute name usage in InferNonNullAggConstraint (#128910) (#129269)
* ESQL: Fix FieldAttribute name usage in InferNonNullAggConstraint (#128910) * Fix InferNonNullAggConstraint with union types * Begin fixing LucenePushdownPredicates with union types * Introduce a dedicated wrapper record FieldName to be used where field names are really required. The fixes consist of using FieldAttribute.fieldName() instead of .name() or .field().name(). .name() can be some temporary string unrelated to the actual name of the Lucene index field, whereas .field().name() doesn't know about parent fields; .fieldName() gives the full field name (from the root of the document). The biggest offender of such misuse is SearchStats; make this always require a FieldName, not a String - and make FieldAttribute#fieldName handily return an instance of FieldName so users of SearchStats don't accidentally use the return value of FieldAttribute#name. (cherry picked from commit 0850bd7) # Conflicts: # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java * Fix some more conflicts * [CI] Auto commit changes from spotless --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent 0b29de3 commit 5ae12c6

File tree

20 files changed

+245
-138
lines changed

20 files changed

+245
-138
lines changed

docs/changelog/128910.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128910
2+
summary: Fix `FieldAttribute` name usage in `InferNonNullAggConstraint`
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,24 @@
2727

2828
/**
2929
* Attribute for an ES field.
30-
* To differentiate between the different type of fields this class offers:
31-
* - name - the fully qualified name (foo.bar.tar)
32-
* - path - the path pointing to the field name (foo.bar)
33-
* - parent - the immediate parent of the field; useful for figuring out the type of field (nested vs object)
34-
* - nestedParent - if nested, what's the parent (which might not be the immediate one)
30+
* This class offers:
31+
* - name - the name of the attribute, but not necessarily of the field.
32+
* - The raw EsField representing the field; for parent.child.grandchild this is just grandchild.
33+
* - parentName - the full path to the immediate parent of the field, e.g. parent.child (without .grandchild)
34+
*
35+
* To adequately represent e.g. union types, the name of the attribute can be altered because we may have multiple synthetic field
36+
* attributes that really belong to the same underlying field. For instance, if a multi-typed field is used both as {@code field::string}
37+
* and {@code field::ip}, we'll generate 2 field attributes called {@code $$field$converted_to$string} and {@code $$field$converted_to$ip}
38+
* but still referring to the same underlying field.
3539
*/
3640
public class FieldAttribute extends TypedAttribute {
3741

42+
/**
43+
* A field name, as found in the mapping. Includes the whole path from the root of the document.
44+
* Implemented as a wrapper around {@link String} to distinguish from the attribute name (which sometimes differs!) at compile time.
45+
*/
46+
public record FieldName(String string) {};
47+
3848
static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3949
Attribute.class,
4050
"FieldAttribute",
@@ -43,6 +53,7 @@ public class FieldAttribute extends TypedAttribute {
4353

4454
private final String parentName;
4555
private final EsField field;
56+
protected FieldName lazyFieldName;
4657

4758
public FieldAttribute(Source source, String name, EsField field) {
4859
this(source, null, name, field);
@@ -184,15 +195,19 @@ public String parentName() {
184195
/**
185196
* The full name of the field in the index, including all parent fields. E.g. {@code parent.subfield.this_field}.
186197
*/
187-
public String fieldName() {
188-
// Before 8.15, the field name was the same as the attribute's name.
189-
// On later versions, the attribute can be renamed when creating synthetic attributes.
190-
// Because until 8.15, we couldn't set `synthetic` to true due to a bug, in that version such FieldAttributes are marked by their
191-
// name starting with `$$`.
192-
if ((synthetic() || name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX)) == false) {
193-
return name();
198+
public FieldName fieldName() {
199+
if (lazyFieldName == null) {
200+
// Before 8.15, the field name was the same as the attribute's name.
201+
// On later versions, the attribute can be renamed when creating synthetic attributes.
202+
// Because until 8.15, we couldn't set `synthetic` to true due to a bug, in that version such FieldAttributes are marked by
203+
// their
204+
// name starting with `$$`.
205+
if ((synthetic() || name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX)) == false) {
206+
lazyFieldName = new FieldName(name());
207+
}
208+
lazyFieldName = new FieldName(Strings.hasText(parentName) ? parentName + "." + field.getName() : field.getName());
194209
}
195-
return Strings.hasText(parentName) ? parentName + "." + field.getName() : field.getName();
210+
return lazyFieldName;
196211
}
197212

198213
public EsField.Exact getExactInfo() {

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public String getWriteableName() {
116116
}
117117

118118
/**
119-
* Returns the field path
119+
* Returns the simple name, but not the full field path. The latter requires knowing the path of the parent field.
120120
*/
121121
public String getName() {
122122
return name;

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import org.elasticsearch.xpack.esql.core.expression.Attribute;
5151
import org.elasticsearch.xpack.esql.core.expression.Expression;
5252
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
53+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute.FieldName;
5354
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
5455
import org.elasticsearch.xpack.esql.core.expression.Literal;
5556
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
@@ -239,22 +240,22 @@ public static EsRelation relation() {
239240
public static class TestSearchStats implements SearchStats {
240241

241242
@Override
242-
public boolean exists(String field) {
243+
public boolean exists(FieldName field) {
243244
return true;
244245
}
245246

246247
@Override
247-
public boolean isIndexed(String field) {
248+
public boolean isIndexed(FieldName field) {
248249
return exists(field);
249250
}
250251

251252
@Override
252-
public boolean hasDocValues(String field) {
253+
public boolean hasDocValues(FieldName field) {
253254
return exists(field);
254255
}
255256

256257
@Override
257-
public boolean hasExactSubfield(String field) {
258+
public boolean hasExactSubfield(FieldName field) {
258259
return exists(field);
259260
}
260261

@@ -264,32 +265,32 @@ public long count() {
264265
}
265266

266267
@Override
267-
public long count(String field) {
268+
public long count(FieldName field) {
268269
return exists(field) ? -1 : 0;
269270
}
270271

271272
@Override
272-
public long count(String field, BytesRef value) {
273+
public long count(FieldName field, BytesRef value) {
273274
return exists(field) ? -1 : 0;
274275
}
275276

276277
@Override
277-
public byte[] min(String field, DataType dataType) {
278+
public byte[] min(FieldName field, DataType dataType) {
278279
return null;
279280
}
280281

281282
@Override
282-
public byte[] max(String field, DataType dataType) {
283+
public byte[] max(FieldName field, DataType dataType) {
283284
return null;
284285
}
285286

286287
@Override
287-
public boolean isSingleValue(String field) {
288+
public boolean isSingleValue(FieldName field) {
288289
return false;
289290
}
290291

291292
@Override
292-
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
293+
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldName name, String value) {
293294
return false;
294295
}
295296
}
@@ -340,23 +341,23 @@ private boolean isConfigationSet(Config config, String field) {
340341
}
341342

342343
@Override
343-
public boolean exists(String field) {
344-
return isConfigationSet(Config.EXISTS, field);
344+
public boolean exists(FieldName field) {
345+
return isConfigationSet(Config.EXISTS, field.string());
345346
}
346347

347348
@Override
348-
public boolean isIndexed(String field) {
349-
return isConfigationSet(Config.INDEXED, field);
349+
public boolean isIndexed(FieldName field) {
350+
return isConfigationSet(Config.INDEXED, field.string());
350351
}
351352

352353
@Override
353-
public boolean hasDocValues(String field) {
354-
return isConfigationSet(Config.DOC_VALUES, field);
354+
public boolean hasDocValues(FieldName field) {
355+
return isConfigationSet(Config.DOC_VALUES, field.string());
355356
}
356357

357358
@Override
358-
public boolean hasExactSubfield(String field) {
359-
return isConfigationSet(Config.EXACT_SUBFIELD, field);
359+
public boolean hasExactSubfield(FieldName field) {
360+
return isConfigationSet(Config.EXACT_SUBFIELD, field.string());
360361
}
361362

362363
@Override
@@ -474,8 +475,8 @@ private static SearchStats fieldMatchingExistOrMissing(boolean exists, String...
474475
private final Set<String> fields = Set.of(names);
475476

476477
@Override
477-
public boolean exists(String field) {
478-
return fields.contains(field) == exists;
478+
public boolean exists(FieldName field) {
479+
return fields.contains(field.string()) == exists;
479480
}
480481
};
481482
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,19 @@ count:long | message:keyword
13251325
3 | Connected to 10.1.0.3
13261326
;
13271327

1328+
multiIndexStatsOfMultiTypedField
1329+
required_capability: union_types
1330+
required_capability: casting_operator
1331+
required_capability: union_types_numeric_widening
1332+
1333+
FROM apps, apps_short
1334+
| STATS s = sum(id::integer)
1335+
;
1336+
1337+
s:long
1338+
210
1339+
;
1340+
13281341
multiIndexMultiColumnTypesRename
13291342
required_capability: union_types
13301343
required_capability: index_metadata_field

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
14511451
indexToConversionExpressions.put(indexName, newConvertFunction);
14521452
}
14531453
MultiTypeEsField multiTypeEsField = new MultiTypeEsField(
1454-
fa.fieldName(),
1454+
fa.fieldName().string(),
14551455
convert.dataType(),
14561456
false,
14571457
indexToConversionExpressions

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnsupportedAttribute.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,13 @@ public UnsupportedEsField field() {
123123
}
124124

125125
@Override
126-
public String fieldName() {
127-
// The super fieldName uses parents to compute the path; this class ignores parents, so we need to rely on the name instead.
128-
// Using field().getName() would be wrong: for subfields like parent.subfield that would return only the last part, subfield.
129-
return name();
126+
public FieldName fieldName() {
127+
if (lazyFieldName == null) {
128+
// The super fieldName uses parents to compute the path; this class ignores parents, so we need to rely on the name instead.
129+
// Using field().getName() would be wrong: for subfields like parent.subfield that would return only the last part, subfield.
130+
lazyFieldName = new FieldName(name());
131+
}
132+
return lazyFieldName;
130133
}
131134

132135
@Override

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/InferNonNullAggConstraint.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
/**
2727
* The vast majority of aggs ignore null entries - this rule adds a pushable filter, as it is cheap
28-
* to execute, to filter this entries out to begin with.
28+
* to execute, to filter these entries out to begin with.
2929
* STATS x = min(a), y = sum(b)
3030
* becomes
3131
* | WHERE a IS NOT NULL OR b IS NOT NULL
@@ -55,7 +55,7 @@ protected LogicalPlan rule(Aggregate aggregate, LocalLogicalOptimizerContext con
5555
Expression field = af.field();
5656
// ignore literals (e.g. COUNT(1))
5757
// make sure the field exists at the source and is indexed (not runtime)
58-
if (field.foldable() == false && field instanceof FieldAttribute fa && stats.isIndexed(fa.name())) {
58+
if (field.foldable() == false && field instanceof FieldAttribute fa && stats.isIndexed(fa.fieldName())) {
5959
nonNullAggFields.add(field);
6060
} else {
6161
// otherwise bail out since unless disjunction needs to cover _all_ fields, things get filtered out

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,32 +126,36 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, Stri
126126
};
127127

128128
/**
129-
* If we have access to SearchStats over a collection of shards, we can make more fine-grained decisions about what can be pushed down.
130-
* This should open up more opportunities for lucene pushdown.
129+
* If we have access to {@link SearchStats} over a collection of shards, we can make more fine-grained decisions about what can be
130+
* pushed down. This should open up more opportunities for lucene pushdown.
131131
*/
132132
static LucenePushdownPredicates from(SearchStats stats) {
133+
// TODO: use FieldAttribute#fieldName, otherwise this doesn't apply to field attributes used for union types.
134+
// C.f. https://github.com/elastic/elasticsearch/issues/128905
133135
return new LucenePushdownPredicates() {
134136
@Override
135137
public boolean hasExactSubfield(FieldAttribute attr) {
136-
return stats.hasExactSubfield(attr.name());
138+
return stats.hasExactSubfield(new FieldAttribute.FieldName(attr.name()));
137139
}
138140

139141
@Override
140142
public boolean isIndexedAndHasDocValues(FieldAttribute attr) {
141143
// We still consider the value of isAggregatable here, because some fields like ScriptFieldTypes are always aggregatable
142144
// But this could hide issues with fields that are not indexed but are aggregatable
143145
// This is the original behaviour for ES|QL, but is it correct?
144-
return attr.field().isAggregatable() || stats.isIndexed(attr.name()) && stats.hasDocValues(attr.name());
146+
return attr.field().isAggregatable()
147+
|| stats.isIndexed(new FieldAttribute.FieldName(attr.name()))
148+
&& stats.hasDocValues(new FieldAttribute.FieldName(attr.name()));
145149
}
146150

147151
@Override
148152
public boolean isIndexed(FieldAttribute attr) {
149-
return stats.isIndexed(attr.name());
153+
return stats.isIndexed(new FieldAttribute.FieldName(attr.name()));
150154
}
151155

152156
@Override
153157
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) {
154-
return stats.canUseEqualityOnSyntheticSourceDelegate(attr.field().getName(), value);
158+
return stats.canUseEqualityOnSyntheticSourceDelegate(attr.fieldName(), value);
155159
}
156160
};
157161
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ private Tuple<List<Attribute>, List<EsStatsQueryExec.Stat>> pushableStats(
9797
if (target instanceof FieldAttribute fa) {
9898
var fName = fa.fieldName();
9999
if (context.searchStats().isSingleValue(fName)) {
100-
fieldName = fName;
100+
fieldName = fName.string();
101101
query = QueryBuilders.existsQuery(fieldName);
102102
}
103103
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi
144144
MappedFieldType.FieldExtractPreference fieldExtractPreference = fieldExtractExec.fieldExtractPreference(attr);
145145
ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference);
146146
// Do not use the field attribute name, this can deviate from the field name for union types.
147-
String fieldName = attr instanceof FieldAttribute fa ? fa.fieldName() : attr.name();
147+
String fieldName = attr instanceof FieldAttribute fa ? fa.fieldName().string() : attr.name();
148148
boolean isUnsupported = dataType == DataType.UNSUPPORTED;
149149
IntFunction<BlockLoader> loader = s -> getBlockLoaderFor(s, fieldName, isUnsupported, fieldExtractPreference, unionTypes);
150150
fields.add(new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, loader));
@@ -277,7 +277,7 @@ public final Operator.OperatorFactory ordinalGroupingOperatorFactory(
277277
boolean isUnsupported = attrSource.dataType() == DataType.UNSUPPORTED;
278278
var unionTypes = findUnionTypes(attrSource);
279279
// Do not use the field attribute name, this can deviate from the field name for union types.
280-
String fieldName = attrSource instanceof FieldAttribute fa ? fa.fieldName() : attrSource.name();
280+
String fieldName = attrSource instanceof FieldAttribute fa ? fa.fieldName().string() : attrSource.name();
281281
return new OrdinalsGroupingOperator.OrdinalsGroupingOperatorFactory(
282282
shardIdx -> getBlockLoaderFor(shardIdx, fieldName, isUnsupported, NONE, unionTypes),
283283
vsShardContexts,

0 commit comments

Comments
 (0)