Skip to content

Commit c94a329

Browse files
committed
feat(mcp): implement temporary device creation for Android, iOS, and Web tools; enhance action space retrieval
1 parent bcf7a1a commit c94a329

File tree

6 files changed

+85
-6
lines changed

6 files changed

+85
-6
lines changed

packages/android-mcp/src/android-tools.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ export class AndroidMidsceneTools extends BaseMidsceneTools {
3131
];
3232
}
3333

34+
protected createTemporaryDevice() {
35+
// Import AndroidDevice class
36+
const { AndroidDevice } = require('@midscene/android');
37+
// Create minimal temporary instance without connecting to device
38+
// The constructor doesn't establish ADB connection
39+
return new AndroidDevice('temp-for-actionspace', {});
40+
}
41+
3442
protected async ensureAgent(deviceId?: string): Promise<AndroidAgent> {
3543
if (this.agent && deviceId) {
3644
// If a specific deviceId is requested and we have an agent,

packages/android/src/agent.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ export async function agentFromAdbDevice(
8888
if (!deviceId) {
8989
const devices = await getConnectedDevices();
9090

91+
if (devices.length === 0) {
92+
throw new Error(
93+
'No Android devices found. Please connect an Android device and ensure ADB is properly configured. Run `adb devices` to verify device connection.',
94+
);
95+
}
96+
9197
deviceId = devices[0].udid;
9298

9399
debugAgent(

packages/ios-mcp/src/ios-tools.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ export class IOSMidsceneTools extends BaseMidsceneTools {
2929
];
3030
}
3131

32+
protected createTemporaryDevice() {
33+
// Import IOSDevice class
34+
const { IOSDevice } = require('@midscene/ios');
35+
// Create minimal temporary instance without connecting to WebDriverAgent
36+
// The constructor only initializes WDA backend, doesn't establish connection
37+
return new IOSDevice({});
38+
}
39+
3240
protected async ensureAgent(): Promise<IOSAgent> {
3341
if (this.agent) {
3442
return this.agent;

packages/mcp/src/web-tools.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,30 @@ export class WebMidsceneTools extends BaseMidsceneTools {
3333
];
3434
}
3535

36+
protected createTemporaryDevice() {
37+
// Import PuppeteerWebPage class
38+
const { PuppeteerWebPage } = require('@midscene/web');
39+
40+
// Create minimal mock page object that satisfies the interface
41+
// actionSpace() method doesn't actually use these methods, just needs the structure
42+
const mockPage = {
43+
url: () => 'about:blank',
44+
mouse: {
45+
click: async () => {},
46+
wheel: async () => {},
47+
move: async () => {},
48+
},
49+
keyboard: {
50+
type: async () => {},
51+
press: async () => {},
52+
},
53+
};
54+
55+
// Create temporary PuppeteerWebPage instance to read actionSpace
56+
// The instance doesn't connect to real browser, just returns action definitions
57+
return new PuppeteerWebPage(mockPage as any, {});
58+
}
59+
3660
protected async ensureAgent(openNewTabWithUrl?: string): Promise<any> {
3761
// Re-init if URL provided
3862
if (this.agent && openNewTabWithUrl) {

packages/shared/src/mcp/base-tools.ts

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,20 @@ export abstract class BaseMidsceneTools implements IMidsceneTools {
3333
return [];
3434
}
3535

36+
/**
37+
* Optional: create a temporary device instance to read actionSpace
38+
* This allows getting real actionSpace without connecting to device
39+
*/
40+
protected createTemporaryDevice?(): any {
41+
return undefined;
42+
}
43+
3644
/**
3745
* Initialize all tools by querying actionSpace
46+
* Uses three-layer fallback strategy:
47+
* 1. Try to get actionSpace from connected agent
48+
* 2. Create temporary device instance to read actionSpace
49+
* 3. Use hardcoded default actionSpace
3850
*/
3951
public async initTools(): Promise<void> {
4052
this.toolDefinitions = [];
@@ -44,16 +56,33 @@ export abstract class BaseMidsceneTools implements IMidsceneTools {
4456
const platformTools = this.preparePlatformTools();
4557
this.toolDefinitions.push(...platformTools);
4658

47-
// 2. Try to get agent and its action space
59+
// 2. Try to get agent and its action space (three-layer fallback)
4860
let actionSpace: any[];
4961
try {
62+
// Layer 1: Try to use connected agent
5063
const agent = await this.ensureAgent();
5164
actionSpace = await agent.getActionSpace();
52-
debug('Action space:', actionSpace.map((a: any) => a.name).join(', '));
65+
debug('Action space from connected agent:', actionSpace.map((a: any) => a.name).join(', '));
5366
} catch (error) {
54-
// If agent initialization fails, use default action space
55-
debug('Using default action space due to initialization failure');
56-
actionSpace = this.getDefaultActionSpace();
67+
debug('Failed to get action space from agent, trying temporary device');
68+
69+
try {
70+
// Layer 2: Create temporary device instance to read actionSpace
71+
if (this.createTemporaryDevice) {
72+
const tempDevice = this.createTemporaryDevice();
73+
actionSpace = tempDevice.actionSpace();
74+
debug('Action space from temporary device:', actionSpace.map((a: any) => a.name).join(', '));
75+
76+
// Destroy temporary instance using optional chaining
77+
await tempDevice.destroy?.();
78+
} else {
79+
throw new Error('createTemporaryDevice not implemented');
80+
}
81+
} catch (fallbackError) {
82+
// Layer 3: Use hardcoded default actionSpace
83+
debug('Using default action space due to all failures');
84+
actionSpace = this.getDefaultActionSpace();
85+
}
5786
}
5887

5988
// 3. Generate tools from action space (core innovation)

packages/shared/src/mcp/tool-generator.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,14 @@ export function generateToolsFromActionSpace(
2525
handler: async (args: any) => {
2626
const agent = await getAgent();
2727

28+
// Extract actual parameters from the 'param' wrapper
29+
// MCP wraps parameters in { param: {...} }, so we need to unwrap it
30+
const actionParams = args.param || args;
31+
2832
// Call the action through agent's action method
2933
await agent.aiAction(`Use the action "${action.name}"`, {
3034
planType: action.name,
31-
...args,
35+
...actionParams,
3236
});
3337

3438
// Return screenshot after action

0 commit comments

Comments
 (0)