Skip to content

Commit ee29108

Browse files
committed
Optimise clone operation for incremental full cluster snapshots (opensearch-project#16296)
* Optimise clone operation for incremental full cluster snapshots Signed-off-by: Ashish Singh <[email protected]> * Add UTs Signed-off-by: Ashish Singh <[email protected]> * Add CHANGELOG Signed-off-by: Ashish Singh <[email protected]> --------- Signed-off-by: Ashish Singh <[email protected]>
1 parent 5947002 commit ee29108

File tree

3 files changed

+504
-58
lines changed

3 files changed

+504
-58
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5252
- Remove identity-related feature flagged code from the RestController ([#15430](https://github.com/opensearch-project/OpenSearch/pull/15430))
5353
- Remove Identity FeatureFlag ([#16024](https://github.com/opensearch-project/OpenSearch/pull/16024))
5454
- Ensure RestHandler.Wrapper delegates all implementations to the wrapped handler ([#16154](https://github.com/opensearch-project/OpenSearch/pull/16154))
55-
55+
- Optimise clone operation for incremental full cluster snapshots ([#16296](https://github.com/opensearch-project/OpenSearch/pull/16296))
5656

5757
### Deprecated
5858

server/src/main/java/org/opensearch/snapshots/SnapshotsService.java

Lines changed: 64 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,7 +1510,8 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
15101510

15111511
private final Set<RepositoryShardId> currentlyCloning = Collections.synchronizedSet(new HashSet<>());
15121512

1513-
private void runReadyClone(
1513+
// Made to package private to be able to test the method in UTs
1514+
void runReadyClone(
15141515
Snapshot target,
15151516
SnapshotId sourceSnapshot,
15161517
ShardSnapshotStatus shardStatusBefore,
@@ -1534,69 +1535,75 @@ public void onFailure(Exception e) {
15341535
@Override
15351536
protected void doRun() {
15361537
final String localNodeId = clusterService.localNode().getId();
1537-
repository.getRepositoryData(ActionListener.wrap(repositoryData -> {
1538-
try {
1539-
final IndexMetadata indexMetadata = repository.getSnapshotIndexMetaData(
1540-
repositoryData,
1538+
if (remoteStoreIndexShallowCopy == false) {
1539+
executeClone(localNodeId, false);
1540+
} else {
1541+
repository.getRepositoryData(ActionListener.wrap(repositoryData -> {
1542+
try {
1543+
final IndexMetadata indexMetadata = repository.getSnapshotIndexMetaData(
1544+
repositoryData,
1545+
sourceSnapshot,
1546+
repoShardId.index()
1547+
);
1548+
final boolean cloneRemoteStoreIndexShardSnapshot = indexMetadata.getSettings()
1549+
.getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false);
1550+
executeClone(localNodeId, cloneRemoteStoreIndexShardSnapshot);
1551+
} catch (IOException e) {
1552+
logger.warn("Failed to get index-metadata from repository data for index [{}]", repoShardId.index().getName());
1553+
failCloneShardAndUpdateClusterState(target, sourceSnapshot, repoShardId);
1554+
}
1555+
}, this::onFailure));
1556+
}
1557+
}
1558+
1559+
private void executeClone(String localNodeId, boolean cloneRemoteStoreIndexShardSnapshot) {
1560+
if (currentlyCloning.add(repoShardId)) {
1561+
if (cloneRemoteStoreIndexShardSnapshot) {
1562+
repository.cloneRemoteStoreIndexShardSnapshot(
15411563
sourceSnapshot,
1542-
repoShardId.index()
1564+
target.getSnapshotId(),
1565+
repoShardId,
1566+
shardStatusBefore.generation(),
1567+
remoteStoreLockManagerFactory,
1568+
getCloneCompletionListener(localNodeId)
15431569
);
1544-
final boolean cloneRemoteStoreIndexShardSnapshot = remoteStoreIndexShallowCopy
1545-
&& indexMetadata.getSettings().getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false);
1546-
final SnapshotId targetSnapshot = target.getSnapshotId();
1547-
final ActionListener<String> listener = ActionListener.wrap(
1548-
generation -> innerUpdateSnapshotState(
1549-
new ShardSnapshotUpdate(
1550-
target,
1551-
repoShardId,
1552-
new ShardSnapshotStatus(localNodeId, ShardState.SUCCESS, generation)
1553-
),
1554-
ActionListener.runBefore(
1555-
ActionListener.wrap(
1556-
v -> logger.trace(
1557-
"Marked [{}] as successfully cloned from [{}] to [{}]",
1558-
repoShardId,
1559-
sourceSnapshot,
1560-
targetSnapshot
1561-
),
1562-
e -> {
1563-
logger.warn("Cluster state update after successful shard clone [{}] failed", repoShardId);
1564-
failAllListenersOnMasterFailOver(e);
1565-
}
1566-
),
1567-
() -> currentlyCloning.remove(repoShardId)
1568-
)
1569-
),
1570-
e -> {
1571-
logger.warn("Exception [{}] while trying to clone shard [{}]", e, repoShardId);
1572-
failCloneShardAndUpdateClusterState(target, sourceSnapshot, repoShardId);
1573-
}
1570+
} else {
1571+
repository.cloneShardSnapshot(
1572+
sourceSnapshot,
1573+
target.getSnapshotId(),
1574+
repoShardId,
1575+
shardStatusBefore.generation(),
1576+
getCloneCompletionListener(localNodeId)
15741577
);
1575-
if (currentlyCloning.add(repoShardId)) {
1576-
if (cloneRemoteStoreIndexShardSnapshot) {
1577-
repository.cloneRemoteStoreIndexShardSnapshot(
1578-
sourceSnapshot,
1579-
targetSnapshot,
1578+
}
1579+
}
1580+
}
1581+
1582+
private ActionListener<String> getCloneCompletionListener(String localNodeId) {
1583+
return ActionListener.wrap(
1584+
generation -> innerUpdateSnapshotState(
1585+
new ShardSnapshotUpdate(target, repoShardId, new ShardSnapshotStatus(localNodeId, ShardState.SUCCESS, generation)),
1586+
ActionListener.runBefore(
1587+
ActionListener.wrap(
1588+
v -> logger.trace(
1589+
"Marked [{}] as successfully cloned from [{}] to [{}]",
15801590
repoShardId,
1581-
shardStatusBefore.generation(),
1582-
remoteStoreLockManagerFactory,
1583-
listener
1584-
);
1585-
} else {
1586-
repository.cloneShardSnapshot(
15871591
sourceSnapshot,
1588-
targetSnapshot,
1589-
repoShardId,
1590-
shardStatusBefore.generation(),
1591-
listener
1592-
);
1593-
}
1594-
}
1595-
} catch (IOException e) {
1596-
logger.warn("Failed to get index-metadata from repository data for index [{}]", repoShardId.index().getName());
1592+
target.getSnapshotId()
1593+
),
1594+
e -> {
1595+
logger.warn("Cluster state update after successful shard clone [{}] failed", repoShardId);
1596+
failAllListenersOnMasterFailOver(e);
1597+
}
1598+
),
1599+
() -> currentlyCloning.remove(repoShardId)
1600+
)
1601+
),
1602+
e -> {
1603+
logger.warn("Exception [{}] while trying to clone shard [{}]", e, repoShardId);
15971604
failCloneShardAndUpdateClusterState(target, sourceSnapshot, repoShardId);
15981605
}
1599-
}, this::onFailure));
1606+
);
16001607
}
16011608
});
16021609
}

0 commit comments

Comments
 (0)