Skip to content

Commit 86cdd22

Browse files
committed
feat(warm-pool): add ghr:warm-pool-member tag for visual identification
Tag instances with ghr:warm-pool-member=true when stopped into the warm pool (scale-down). Remove the tag when starting from the warm pool (scale-up). This provides instant visibility in the EC2 console for which stopped instances are currently warm pool members. - scale-down: tag after stopRunner in both removeRunner and stopOrTerminateOrphan paths - scale-up: untag in findAndStartWarmRunners alongside the ghr:started-from-warm-pool tag - lambda-warm-pool.json: add ec2:DeleteTags permission for untag
1 parent ae1d27c commit 86cdd22

5 files changed

Lines changed: 19 additions & 7 deletions

File tree

lambdas/functions/control-plane/src/scale-runners/find-and-start-warm-runners.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, it, expect, beforeEach, vi } from 'vitest';
2-
import { startRunner, tag } from './../aws/runners';
2+
import { startRunner, tag, untag } from './../aws/runners';
33
import {
44
getWarmPoolConfig,
55
getPoolStrategy,
@@ -46,6 +46,7 @@ vi.mock('./job-retry', () => ({
4646

4747
const mockStartRunner = vi.mocked(startRunner);
4848
const mockTag = vi.mocked(tag);
49+
const mockUntag = vi.mocked(untag);
4950
const mockGetWarmPoolConfig = vi.mocked(getWarmPoolConfig);
5051
const mockGetPoolStrategy = vi.mocked(getPoolStrategy);
5152
const mockListWarmInstances = vi.mocked(listWarmInstancesByOwner);
@@ -144,6 +145,7 @@ describe('findAndStartWarmRunners', () => {
144145

145146
expect(result).toEqual(['i-warm-1']);
146147
expect(mockTag).toHaveBeenCalledWith('i-warm-1', [{ Key: 'ghr:started-from-warm-pool', Value: 'true' }]);
148+
expect(mockUntag).toHaveBeenCalledWith('i-warm-1', [{ Key: 'ghr:warm-pool-member' }]);
147149
});
148150

149151
it('should succeed even if tag fails (best-effort)', async () => {

lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,9 @@ describe('Scale down runners', () => {
719719
await scaleDown();
720720

721721
expect(mockStopRunner).toHaveBeenCalledWith(runners[0].instanceId);
722+
expect(mockTagRunners).toHaveBeenCalledWith(runners[0].instanceId, [
723+
{ Key: 'ghr:warm-pool-member', Value: 'true' },
724+
]);
722725
expect(mockAddToWarmPool).toHaveBeenCalledWith({
723726
instanceId: runners[0].instanceId,
724727
runnerOwner: runners[0].owner,

lambdas/functions/control-plane/src/scale-runners/scale-down.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promi
174174
if (warmCount < warmPoolConfig.maxWarmInstances) {
175175
try {
176176
await stopRunner(ec2runner.instanceId);
177+
await tag(ec2runner.instanceId, [{ Key: 'ghr:warm-pool-member', Value: 'true' }]);
177178
await addToWarmPool({
178179
instanceId: ec2runner.instanceId,
179180
runnerOwner: ec2runner.owner,
@@ -350,6 +351,7 @@ async function stopOrTerminateOrphan(
350351
if (warmCount < warmPoolConfig.maxWarmInstances) {
351352
try {
352353
await stopRunner(runner.instanceId);
354+
await tag(runner.instanceId, [{ Key: 'ghr:warm-pool-member', Value: 'true' }]);
353355
await addToWarmPool({
354356
instanceId: runner.instanceId,
355357
runnerOwner: owner,

lambdas/functions/control-plane/src/scale-runners/scale-up.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { getParameter, putParameter } from '@aws-github-runner/aws-ssm-util';
44
import yn from 'yn';
55

66
import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from '../github/auth';
7-
import { createRunner, listEC2Runners, startRunner, tag, terminateRunner } from './../aws/runners';
7+
import { createRunner, listEC2Runners, startRunner, tag, untag, terminateRunner } from './../aws/runners';
88
import { RunnerInputParameters } from './../aws/runners.d';
99
import { metricGitHubAppRateLimit } from '../github/rate-limit';
1010
import { publishRetryMessage } from './job-retry';
@@ -282,9 +282,12 @@ export async function findAndStartWarmRunners(
282282
emitWarmPoolMetric('WarmPoolInstanceStarted', 1, { Owner: runnerOwner });
283283
logger.info(`Started warm instance '${entry.instanceId}' for owner '${runnerOwner}'`);
284284

285-
// Observability tag per ADR: marks this instance was started from warm pool (permanent, best-effort)
286-
await tag(entry.instanceId, [{ Key: 'ghr:started-from-warm-pool', Value: 'true' }]).catch((e) => {
287-
logger.warn(`Failed to tag '${entry.instanceId}' as started-from-warm-pool, continuing`, { error: e });
285+
// Observability tags (best-effort): mark started from warm pool + remove membership tag
286+
await Promise.all([
287+
tag(entry.instanceId, [{ Key: 'ghr:started-from-warm-pool', Value: 'true' }]),
288+
untag(entry.instanceId, [{ Key: 'ghr:warm-pool-member' }]),
289+
]).catch((e) => {
290+
logger.warn(`Failed to update tags on '${entry.instanceId}', continuing`, { error: e });
288291
});
289292
} catch (e) {
290293
logger.warn(`Failed to start warm instance '${entry.instanceId}', skipping`, { error: e as Error });

modules/runners/policies/lambda-warm-pool.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
"Action": [
2020
"ec2:StopInstances",
2121
"ec2:StartInstances",
22-
"ec2:CreateTags"
22+
"ec2:CreateTags",
23+
"ec2:DeleteTags"
2324
],
2425
"Resource": "*",
2526
"Condition": {
@@ -33,7 +34,8 @@
3334
"Action": [
3435
"ec2:StopInstances",
3536
"ec2:StartInstances",
36-
"ec2:CreateTags"
37+
"ec2:CreateTags",
38+
"ec2:DeleteTags"
3739
],
3840
"Resource": "*",
3941
"Condition": {

0 commit comments

Comments
 (0)