Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b4e0dc6
feat(remote-comms): Add handshake message types
sirtimid Jan 29, 2026
38f0ff1
feat(remote-comms): Generate incarnation ID in kernel
sirtimid Jan 29, 2026
d469219
feat(remote-comms): Add incarnation tracking to PeerStateManager
sirtimid Jan 29, 2026
e14e7d2
feat(remote-comms): Implement handshake protocol in transport layer
sirtimid Jan 29, 2026
c210b86
test(e2e): Add incarnation detection e2e tests
sirtimid Jan 29, 2026
c0fbeb1
fix(kernel): Use getRandomValues() for wider browser compatibility
sirtimid Jan 29, 2026
7d54b3e
Revert "fix(kernel): Use getRandomValues() for wider browser compatib…
sirtimid Jan 29, 2026
951d52d
refactor(kernel): Simplify incarnation ID architecture
sirtimid Jan 29, 2026
459246c
refactor(transport): simplify code and fix build
sirtimid Jan 29, 2026
75f5f17
fix(handshake): validate params.incarnationId in type guard
sirtimid Jan 29, 2026
9cec2a6
fix(handshake): clean up timeout timer in readWithTimeout
sirtimid Jan 29, 2026
613f15e
fix(transport): use safe holder pattern for handleConnectionLoss
sirtimid Jan 29, 2026
6113234
fix(e2e): remove redundant delay and clean up fresh database file
sirtimid Jan 29, 2026
2fe6f05
test(transport): implement previously disabled todo tests
sirtimid Jan 29, 2026
25ee7e5
cleanup
sirtimid Jan 29, 2026
262aa93
fix(handshake): clean up abort event listener in readWithTimeout
sirtimid Jan 29, 2026
fc8e005
refactor(kernel): remove unused incarnationId field
sirtimid Jan 29, 2026
09b214b
refactor(handshake): remove unused getRemoteIncarnation from deps
sirtimid Jan 29, 2026
606f03f
fix(browser-runtime): add missing incarnationId to PlatformServicesCl…
sirtimid Jan 29, 2026
d29f8c7
fix(handshake): use optional chaining for parsed.method in error mess…
sirtimid Jan 29, 2026
48fcf57
merge: resolve conflicts with main
sirtimid Jan 29, 2026
f105d58
Merge branch 'main' into sirtimid/issue-689-incarnation-detection
sirtimid Jan 29, 2026
516cd65
refactor: remove unused code and fix type import
sirtimid Jan 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ export class PlatformServicesClient implements PlatformServices {
* @param options.maxQueue - Maximum number of messages to queue per peer while reconnecting (default: 200).
* @param remoteMessageHandler - A handler function to receive remote messages.
* @param onRemoteGiveUp - Optional callback to be called when we give up on a remote.
* @param incarnationId - Unique identifier for this kernel instance.
* @returns A promise that resolves once network access has been established
* or rejects if there is some problem doing so.
*/
Expand All @@ -202,6 +203,7 @@ export class PlatformServicesClient implements PlatformServices {
options: RemoteCommsOptions,
remoteMessageHandler: (from: string, message: string) => Promise<string>,
onRemoteGiveUp?: (peerId: string) => void,
incarnationId?: string,
): Promise<void> {
this.#remoteMessageHandler = remoteMessageHandler;
this.#remoteGiveUpHandler = onRemoteGiveUp;
Expand All @@ -210,6 +212,7 @@ export class PlatformServicesClient implements PlatformServices {
...Object.fromEntries(
Object.entries(options).filter(([, value]) => value !== undefined),
),
...(incarnationId !== undefined && { incarnationId }),
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ describe('PlatformServicesServer', () => {
{ relays },
expect.any(Function),
expect.any(Function),
undefined,
);
});

Expand All @@ -428,6 +429,7 @@ describe('PlatformServicesServer', () => {
options,
expect.any(Function),
expect.any(Function),
undefined,
);
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,14 +270,13 @@ export class PlatformServicesServer {
* connections from other kernels.
* @param options.maxRetryAttempts - Maximum number of reconnection attempts. 0 = infinite (default).
* @param options.maxQueue - Maximum number of messages to queue per peer while reconnecting (default: 200).
* @param _onRemoteGiveUp - Unused parameter (kept for interface compatibility).
* Remote give-up notifications are sent via RPC instead.
* @param incarnationId - This kernel's incarnation ID for handshake protocol.
* @returns A promise that resolves when network access has been initialized.
*/
async #initializeRemoteComms(
keySeed: string,
options: RemoteCommsOptions,
_onRemoteGiveUp?: (peerId: string) => void,
incarnationId?: string,
): Promise<null> {
if (this.#sendRemoteMessageFunc) {
throw Error('remote comms already initialized');
Expand All @@ -293,6 +292,7 @@ export class PlatformServicesServer {
options,
this.#handleRemoteMessage.bind(this),
this.#handleRemoteGiveUp.bind(this),
incarnationId,
);
this.#sendRemoteMessageFunc = sendRemoteMessage;
this.#stopRemoteCommsFunc = stop;
Expand Down
67 changes: 67 additions & 0 deletions packages/nodejs/test/e2e/remote-comms.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { Kernel, kunser, makeKernelStore } from '@metamask/ocap-kernel';
import type { KRef } from '@metamask/ocap-kernel';
import { startRelay } from '@ocap/cli/relay';
import { delay } from '@ocap/repo-tools/test-utils';
import { unlink } from 'node:fs/promises';
import { describe, it, expect, beforeEach, afterEach } from 'vitest';

import { makeTestKernel, runTestVats } from '../helpers/kernel.ts';
Expand Down Expand Up @@ -865,6 +866,72 @@ describe.sequential('Remote Communications E2E', () => {
);
});

describe('Incarnation Detection', () => {
it(
'detects incarnation change when peer restarts with fresh state',
async () => {
// Initialize with low retry attempts to trigger give-up on incarnation change
await kernel1.initRemoteComms({
relays: testRelays,
maxRetryAttempts: 2,
});
await kernel2.initRemoteComms({ relays: testRelays });

const aliceConfig = makeRemoteVatConfig('Alice');
const bobConfig = makeRemoteVatConfig('Bob');

await launchVatAndGetURL(kernel1, aliceConfig);
const bobURL = await launchVatAndGetURL(kernel2, bobConfig);
const aliceRef = getVatRootRef(kernel1, kernelStore1, 'Alice');

// Establish connection and exchange handshakes
await sendRemoteMessage(kernel1, aliceRef, bobURL, 'hello', ['Alice']);

// Stop kernel2
await kernel2.stop();

// Simulate state loss by deleting kernel2's database and creating fresh
kernelDatabase2.close();
const freshDb = await makeSQLKernelDatabase({
dbFilename: 'rc-e2e-test-kernel2-fresh.db',
});

try {
// Create a completely new kernel (new incarnation ID, no previous state)
// eslint-disable-next-line require-atomic-updates
kernel2 = await makeTestKernel(freshDb, true);
await kernel2.initRemoteComms({ relays: testRelays });

// Launch Bob again (fresh vat, no previous state)
await launchVatAndGetURL(kernel2, bobConfig);

// Send a message - when the new kernel connects, it will have a different
// incarnation ID than before. The handshake will detect this change
// and trigger promise rejection for pending work.
// The await will naturally wait for the promise to settle - either
// succeeding (unexpected) or failing due to incarnation change detection.
const result = await kernel1.queueMessage(
aliceRef,
'sendRemoteMessage',
[bobURL, 'hello', ['Alice']],
);
const response = kunser(result);

// The message should fail because incarnation changed
// Either due to promise rejection or remote state being lost
expect(response).toBeInstanceOf(Error);
} finally {
freshDb.close();
// Clean up the fresh database file
await unlink('rc-e2e-test-kernel2-fresh.db').catch(() => {
// Ignore errors if file doesn't exist
});
}
},
NETWORK_TIMEOUT * 3,
);
});

describe('Promise Rejection on Remote Give-Up', () => {
it(
'rejects promises when remote connection is lost after max retries',
Expand Down
30 changes: 18 additions & 12 deletions packages/ocap-kernel/src/Kernel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,11 @@ export class Kernel {
}

if (options.resetStorage) {
this.#resetKernelState();
// If mnemonic is provided with resetStorage, also clear identity
// to allow recovery with the new mnemonic
if (options.mnemonic) {
this.#kernelStore.kv.delete('keySeed');
this.#kernelStore.kv.delete('peerId');
this.#kernelStore.kv.delete('ocapURLKey');
}
this.#resetKernelState({ resetIdentity: Boolean(options.mnemonic) });
}

this.#kernelQueue = new KernelQueue(
this.#kernelStore,
async (vatId, reason) => this.#vatManager.terminateVat(vatId, reason),
Expand Down Expand Up @@ -516,12 +512,22 @@ export class Kernel {

/**
* Reset the kernel state.
*/
#resetKernelState(): void {
this.#kernelStore.reset({
// XXX special case hack so that network address survives restart when testing
except: ['keySeed', 'peerId', 'ocapURLKey'],
});
*
* @param options - Options for the reset.
* @param options.resetIdentity - If true, also clears identity keys (keySeed, peerId, ocapURLKey, incarnationId).
*/
#resetKernelState({
resetIdentity = false,
}: { resetIdentity?: boolean } = {}): void {
if (resetIdentity) {
// Full reset including identity - used when recovering from mnemonic
this.#kernelStore.reset();
} else {
// Preserve identity keys so network address survives restart
this.#kernelStore.reset({
except: ['keySeed', 'peerId', 'ocapURLKey', 'incarnationId'],
});
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ describe('RemoteManager', () => {
logger,
undefined,
expect.any(Function),
kernelStore.getOrCreateIncarnationId(),
);
});

Expand All @@ -102,10 +103,12 @@ describe('RemoteManager', () => {
relays: ['relay1', 'relay2'],
maxRetryAttempts: 5,
maxQueue: 100,
mnemonic: undefined,
},
logger,
undefined,
expect.any(Function),
kernelStore.getOrCreateIncarnationId(),
);
});

Expand Down Expand Up @@ -133,6 +136,7 @@ describe('RemoteManager', () => {
logger,
keySeed,
expect.any(Function),
kernelStore.getOrCreateIncarnationId(),
);
});

Expand Down
9 changes: 9 additions & 0 deletions packages/ocap-kernel/src/remotes/kernel/RemoteManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ export class RemoteManager {
/** Optional mnemonic for seed derivation */
readonly #mnemonic: string | undefined;

/**
* Unique identifier for this kernel instance.
* Used to detect when a remote peer has lost its state and reconnected.
*/
readonly #incarnationId: string;

/** Remote communications interface */
#remoteComms: RemoteComms | undefined;

Expand Down Expand Up @@ -81,6 +87,8 @@ export class RemoteManager {
this.#logger = logger;
this.#keySeed = keySeed;
this.#mnemonic = mnemonic;
// Get incarnation ID from store - it's persisted so it survives restarts
this.#incarnationId = kernelStore.getOrCreateIncarnationId();
}

/**
Expand Down Expand Up @@ -153,6 +161,7 @@ export class RemoteManager {
this.#logger,
this.#keySeed,
this.#handleRemoteGiveUp.bind(this),
this.#incarnationId,
);

// Restore all remotes that were previously established
Expand Down
24 changes: 24 additions & 0 deletions packages/ocap-kernel/src/remotes/kernel/remote-comms.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ describe('remote-comms', () => {
}),
mockRemoteMessageHandler,
undefined, // onRemoteGiveUp
undefined, // incarnationId
);
});

Expand All @@ -165,6 +166,7 @@ describe('remote-comms', () => {
}),
mockRemoteMessageHandler,
undefined,
undefined, // incarnationId
);
});

Expand All @@ -184,6 +186,28 @@ describe('remote-comms', () => {
expect.any(Object),
mockRemoteMessageHandler,
onRemoteGiveUp,
undefined, // incarnationId
);
});

it('passes incarnationId to platformServices', async () => {
const incarnationId = 'test-incarnation-id';
await initRemoteComms(
mockKernelStore,
mockPlatformServices,
mockRemoteMessageHandler,
{},
undefined, // logger
undefined, // keySeed
undefined, // onRemoteGiveUp
incarnationId,
);
expect(mockPlatformServices.initializeRemoteComms).toHaveBeenCalledWith(
expect.any(String),
expect.any(Object),
mockRemoteMessageHandler,
undefined,
incarnationId,
);
});

Expand Down
3 changes: 3 additions & 0 deletions packages/ocap-kernel/src/remotes/kernel/remote-comms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ export function getKnownRelays(kv: KVStore): string[] {
* @param logger - The logger to use.
* @param keySeed - Optional seed for libp2p key generation.
* @param onRemoteGiveUp - Optional callback to be called when we give up on a remote.
* @param incarnationId - Unique identifier for this kernel instance.
*
* @returns the initialized remote comms object.
*/
Expand All @@ -115,6 +116,7 @@ export async function initRemoteComms(
logger?: Logger,
keySeed?: string,
onRemoteGiveUp?: OnRemoteGiveUp,
incarnationId?: string,
): Promise<RemoteComms> {
let peerId: string;
let ocapURLKey: Uint8Array;
Expand Down Expand Up @@ -167,6 +169,7 @@ export async function initRemoteComms(
{ ...options, relays: knownRelays },
remoteMessageHandler,
onRemoteGiveUp,
incarnationId,
);

/**
Expand Down
Loading
Loading