Skip to content

Commit 90310c3

Browse files
andrrosssarthakaggarwal97backslasht
authored
Introducing ZStd compression codec plugin (opensearch-project#9658) (opensearch-project#9832)
* introducing zstd compression codec plugin * Moving zstd compression codec as a plugin * introducing zstd compression codec plugin * Adding checks to EngineConfig and fixing tests * incorporating review comments * fixing tests * introducing zstd compression codec plugin * addressing review comments * nit fixes * implementing codec aliases * addressing review comments * review comments * moving codec aliases to custom codec * adding zstd default codec for backward compatibility * renaming to deprecated codec * incorporating review comments * nit fixes --------- (cherry picked from commit 76f1b52) Signed-off-by: Sarthak Aggarwal <[email protected]> Signed-off-by: Prabhakar Sithanandam <[email protected]> Signed-off-by: Andrew Ross <[email protected]> Co-authored-by: Sarthak Aggarwal <[email protected]> Co-authored-by: Prabhakar Sithanandam <[email protected]>
1 parent 655d223 commit 90310c3

File tree

30 files changed

+724
-147
lines changed

30 files changed

+724
-147
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
117117
- [Remote Store] Add support for Remote Translog Store stats in `_remotestore/stats/` API ([#9263](https://github.com/opensearch-project/OpenSearch/pull/9263))
118118
- Removing the vec file extension from INDEX_STORE_HYBRID_NIO_EXTENSIONS, to ensure the no performance degradation for vector search via Lucene Engine.([#9528](https://github.com/opensearch-project/OpenSearch/pull/9528)))
119119
- Cleanup Unreferenced file on segment merge failure ([#9503](https://github.com/opensearch-project/OpenSearch/pull/9503))
120+
- Move ZStd to a plugin ([#9658](https://github.com/opensearch-project/OpenSearch/pull/9658))
120121

121122
### Deprecated
122123

modules/reindex/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ dependencies {
6969
testImplementation project(':modules:transport-netty4')
7070
// for parent/child testing
7171
testImplementation project(':modules:parent-join')
72+
testImplementation project(':plugins:custom-codecs')
7273
}
7374

7475
restResources {
@@ -95,4 +96,5 @@ forbiddenPatterns {
9596
tasks.named("bundlePlugin").configure {
9697
dependsOn("copyParentJoinMetadata")
9798
dependsOn("copyTransportNetty4Metadata")
99+
dependsOn("copyCustomCodecsMetadata")
98100
}

modules/reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,19 @@
1515
import org.opensearch.action.support.ActiveShardCount;
1616
import org.opensearch.cluster.metadata.IndexMetadata;
1717
import org.opensearch.common.settings.Settings;
18+
import org.opensearch.index.codec.customcodecs.CustomCodecPlugin;
1819
import org.opensearch.index.engine.Segment;
1920
import org.opensearch.index.reindex.BulkByScrollResponse;
2021
import org.opensearch.index.reindex.ReindexAction;
22+
import org.opensearch.index.reindex.ReindexPlugin;
2123
import org.opensearch.index.reindex.ReindexRequestBuilder;
2224
import org.opensearch.index.reindex.ReindexTestCase;
25+
import org.opensearch.plugins.Plugin;
2326

2427
import java.util.ArrayList;
2528
import java.util.Arrays;
29+
import java.util.Collection;
30+
import java.util.List;
2631
import java.util.Map;
2732
import java.util.UUID;
2833
import java.util.concurrent.ExecutionException;
@@ -40,6 +45,11 @@
4045

4146
public class MultiCodecReindexIT extends ReindexTestCase {
4247

48+
@Override
49+
protected Collection<Class<? extends Plugin>> nodePlugins() {
50+
return List.of(CustomCodecPlugin.class, ReindexPlugin.class);
51+
}
52+
4353
public void testReindexingMultipleCodecs() throws InterruptedException, ExecutionException {
4454
internalCluster().ensureAtLeastNumDataNodes(1);
4555
Map<String, String> codecMap = Map.of(

plugins/custom-codecs/build.gradle

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*
8+
* Modifications Copyright OpenSearch Contributors. See
9+
* GitHub history for details.
10+
*/
11+
12+
apply plugin: 'opensearch.opensearchplugin'
13+
apply plugin: 'opensearch.internal-cluster-test'
14+
15+
opensearchplugin {
16+
name 'custom-codecs'
17+
description 'A plugin that implements custom compression codecs.'
18+
classname 'org.opensearch.index.codec.customcodecs.CustomCodecPlugin'
19+
licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt')
20+
noticeFile rootProject.file('NOTICE.txt')
21+
}
22+
23+
dependencies {
24+
api "com.github.luben:zstd-jni:1.5.5-5"
25+
}
26+
27+
testingConventions.enabled = false;

server/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java renamed to plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/CodecCompressionLevelIT.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,24 @@
1212
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
1313
import org.opensearch.cluster.metadata.IndexMetadata;
1414
import org.opensearch.common.settings.Settings;
15+
import org.opensearch.index.codec.customcodecs.CustomCodecPlugin;
16+
import org.opensearch.plugins.Plugin;
1517
import org.opensearch.test.OpenSearchIntegTestCase;
1618

19+
import java.util.Collection;
20+
import java.util.Collections;
1721
import java.util.concurrent.ExecutionException;
1822

23+
import static org.opensearch.index.codec.customcodecs.CustomCodecService.ZSTD_CODEC;
24+
import static org.opensearch.index.codec.customcodecs.CustomCodecService.ZSTD_NO_DICT_CODEC;
1925
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
2026

2127
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST)
2228
public class CodecCompressionLevelIT extends OpenSearchIntegTestCase {
29+
@Override
30+
protected Collection<Class<? extends Plugin>> nodePlugins() {
31+
return Collections.singletonList(CustomCodecPlugin.class);
32+
}
2333

2434
public void testLuceneCodecsCreateIndexWithCompressionLevel() {
2535

@@ -62,7 +72,7 @@ public void testZStandardCodecsCreateIndexWithCompressionLevel() {
6272
Settings.builder()
6373
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
6474
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
65-
.put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC))
75+
.put("index.codec", randomFrom(ZSTD_CODEC, ZSTD_NO_DICT_CODEC))
6676
.put("index.codec.compression_level", randomIntBetween(1, 6))
6777
.build()
6878
);
@@ -81,7 +91,7 @@ public void testZStandardToLuceneCodecsWithCompressionLevel() throws ExecutionEx
8191
Settings.builder()
8292
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
8393
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
84-
.put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC))
94+
.put("index.codec", randomFrom(ZSTD_CODEC, ZSTD_NO_DICT_CODEC))
8595
.put("index.codec.compression_level", randomIntBetween(1, 6))
8696
.build()
8797
);
@@ -164,7 +174,7 @@ public void testLuceneToZStandardCodecsWithCompressionLevel() throws ExecutionEx
164174
.updateSettings(
165175
new UpdateSettingsRequest(index).settings(
166176
Settings.builder()
167-
.put("index.codec", randomFrom(CodecService.ZSTD_CODEC, CodecService.ZSTD_NO_DICT_CODEC))
177+
.put("index.codec", randomFrom(ZSTD_CODEC, ZSTD_NO_DICT_CODEC))
168178
.put("index.codec.compression_level", randomIntBetween(1, 6))
169179
)
170180
)

server/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java renamed to plugins/custom-codecs/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecMergeIT.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@
1515
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
1616
import org.opensearch.cluster.metadata.IndexMetadata;
1717
import org.opensearch.common.settings.Settings;
18+
import org.opensearch.index.codec.customcodecs.CustomCodecPlugin;
1819
import org.opensearch.index.engine.Segment;
20+
import org.opensearch.plugins.Plugin;
1921
import org.opensearch.test.OpenSearchIntegTestCase;
2022

2123
import java.util.ArrayList;
2224
import java.util.Arrays;
25+
import java.util.Collection;
26+
import java.util.Collections;
2327
import java.util.List;
2428
import java.util.Map;
2529
import java.util.UUID;
@@ -40,6 +44,11 @@
4044
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST)
4145
public class MultiCodecMergeIT extends OpenSearchIntegTestCase {
4246

47+
@Override
48+
protected Collection<Class<? extends Plugin>> nodePlugins() {
49+
return Collections.singletonList(CustomCodecPlugin.class);
50+
}
51+
4352
public void testForceMergeMultipleCodecs() throws ExecutionException, InterruptedException {
4453

4554
Map<String, String> codecMap = Map.of(
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.opensearch.index.IndexSettings;
12+
import org.opensearch.index.codec.CodecServiceFactory;
13+
import org.opensearch.index.engine.EngineConfig;
14+
import org.opensearch.plugins.EnginePlugin;
15+
import org.opensearch.plugins.Plugin;
16+
17+
import java.util.Optional;
18+
19+
/**
20+
* A plugin that implements custom codecs. Supports these codecs:
21+
* <ul>
22+
* <li>ZSTD
23+
* <li>ZSTDNODICT
24+
* </ul>
25+
*
26+
* @opensearch.internal
27+
*/
28+
public final class CustomCodecPlugin extends Plugin implements EnginePlugin {
29+
30+
/**
31+
* Creates a new instance
32+
*/
33+
public CustomCodecPlugin() {}
34+
35+
/**
36+
* @param indexSettings is the default indexSettings
37+
* @return the engine factory
38+
*/
39+
@Override
40+
public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSettings indexSettings) {
41+
String codecName = indexSettings.getValue(EngineConfig.INDEX_CODEC_SETTING);
42+
if (codecName.equals(CustomCodecService.ZSTD_NO_DICT_CODEC) || codecName.equals(CustomCodecService.ZSTD_CODEC)) {
43+
return Optional.of(new CustomCodecServiceFactory());
44+
}
45+
return Optional.empty();
46+
}
47+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.apache.logging.log4j.Logger;
12+
import org.apache.lucene.codecs.Codec;
13+
import org.opensearch.common.collect.MapBuilder;
14+
import org.opensearch.index.IndexSettings;
15+
import org.opensearch.index.codec.CodecService;
16+
import org.opensearch.index.mapper.MapperService;
17+
18+
import java.util.Arrays;
19+
import java.util.Map;
20+
import java.util.stream.Stream;
21+
22+
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;
23+
24+
/**
25+
* CustomCodecService provides ZSTD and ZSTD_NO_DICT compression codecs.
26+
*/
27+
public class CustomCodecService extends CodecService {
28+
private final Map<String, Codec> codecs;
29+
/**
30+
* ZStandard codec
31+
*/
32+
public static final String ZSTD_CODEC = "zstd";
33+
/**
34+
* ZStandard without dictionary codec
35+
*/
36+
public static final String ZSTD_NO_DICT_CODEC = "zstd_no_dict";
37+
38+
/**
39+
* Creates a new CustomCodecService.
40+
*
41+
* @param mapperService The mapper service.
42+
* @param indexSettings The index settings.
43+
* @param logger The logger.
44+
*/
45+
public CustomCodecService(MapperService mapperService, IndexSettings indexSettings, Logger logger) {
46+
super(mapperService, indexSettings, logger);
47+
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
48+
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
49+
if (mapperService == null) {
50+
codecs.put(ZSTD_CODEC, new ZstdCodec(compressionLevel));
51+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(compressionLevel));
52+
} else {
53+
codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger, compressionLevel));
54+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger, compressionLevel));
55+
}
56+
this.codecs = codecs.immutableMap();
57+
}
58+
59+
@Override
60+
public Codec codec(String name) {
61+
Codec codec = codecs.get(name);
62+
if (codec == null) {
63+
return super.codec(name);
64+
}
65+
return codec;
66+
}
67+
68+
@Override
69+
public String[] availableCodecs() {
70+
return Stream.concat(Arrays.stream(super.availableCodecs()), codecs.keySet().stream()).toArray(String[]::new);
71+
}
72+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.opensearch.index.codec.CodecService;
12+
import org.opensearch.index.codec.CodecServiceConfig;
13+
import org.opensearch.index.codec.CodecServiceFactory;
14+
15+
/**
16+
* A factory for creating new {@link CodecService} instance
17+
*/
18+
public class CustomCodecServiceFactory implements CodecServiceFactory {
19+
20+
/** Creates a new instance. */
21+
public CustomCodecServiceFactory() {}
22+
23+
@Override
24+
public CodecService createCodecService(CodecServiceConfig config) {
25+
return new CustomCodecService(config.getMapperService(), config.getIndexSettings(), config.getLogger());
26+
}
27+
}

server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java renamed to plugins/custom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
1616
import org.opensearch.index.mapper.MapperService;
1717

18+
import java.util.Collections;
19+
import java.util.Set;
20+
1821
/**
1922
*
2023
* Extends {@link FilterCodec} to reuse the functionality of Lucene Codec.
@@ -23,12 +26,48 @@
2326
* @opensearch.internal
2427
*/
2528
public abstract class Lucene95CustomCodec extends FilterCodec {
29+
30+
/** Default compression level used for compression */
2631
public static final int DEFAULT_COMPRESSION_LEVEL = 3;
2732

2833
/** Each mode represents a compression algorithm. */
2934
public enum Mode {
30-
ZSTD,
31-
ZSTD_NO_DICT
35+
/**
36+
* ZStandard mode with dictionary
37+
*/
38+
ZSTD("ZSTD", Set.of("zstd")),
39+
/**
40+
* ZStandard mode without dictionary
41+
*/
42+
ZSTD_NO_DICT("ZSTDNODICT", Set.of("zstd_no_dict")),
43+
/**
44+
* Deprecated ZStandard mode, added for backward compatibility to support indices created in 2.9.0 where
45+
* both ZSTD and ZSTD_NO_DICT used Lucene95CustomCodec underneath. This should not be used to
46+
* create new indices.
47+
*/
48+
ZSTD_DEPRECATED("Lucene95CustomCodec", Collections.emptySet());
49+
50+
private final String codec;
51+
private final Set<String> aliases;
52+
53+
Mode(String codec, Set<String> aliases) {
54+
this.codec = codec;
55+
this.aliases = aliases;
56+
}
57+
58+
/**
59+
* Returns the Codec that is registered with Lucene
60+
*/
61+
public String getCodec() {
62+
return codec;
63+
}
64+
65+
/**
66+
* Returns the aliases of the Codec
67+
*/
68+
public Set<String> getAliases() {
69+
return aliases;
70+
}
3271
}
3372

3473
private final StoredFieldsFormat storedFieldsFormat;
@@ -51,12 +90,22 @@ public Lucene95CustomCodec(Mode mode) {
5190
* @param compressionLevel The compression level.
5291
*/
5392
public Lucene95CustomCodec(Mode mode, int compressionLevel) {
54-
super("Lucene95CustomCodec", new Lucene95Codec());
93+
super(mode.getCodec(), new Lucene95Codec());
5594
this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel);
5695
}
5796

97+
/**
98+
* Creates a new compression codec with the given compression level. We use
99+
* lowercase letters when registering the codec so that we remain consistent with
100+
* the other compression codecs: default, lucene_default, and best_compression.
101+
*
102+
* @param mode The compression codec (ZSTD or ZSTDNODICT).
103+
* @param compressionLevel The compression level.
104+
* @param mapperService The mapper service.
105+
* @param logger The logger.
106+
*/
58107
public Lucene95CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
59-
super("Lucene95CustomCodec", new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger));
108+
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger));
60109
this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel);
61110
}
62111

0 commit comments

Comments
 (0)