Files
zclaw_openfang/desktop/tests/e2e/specs/user-scenarios-live.spec.ts
iven 6d2bedcfd7
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
test(desktop): Phase 4 E2E scenario tests — 47 tests for 10 user scenarios
4 new Playwright spec files covering all 10 planned E2E scenarios:

- user-scenarios-core.spec.ts (14 tests): Onboarding, multi-turn dialogue,
  model switching — covers scenarios 1-3
- user-scenarios-automation.spec.ts (16 tests): Hands CRUD/trigger/approval,
  Pipeline workflow, automation triggers — covers scenarios 4, 6, 9
- user-scenarios-saas-memory.spec.ts (16 tests): Memory system, settings
  config, SaaS integration, butler panel — covers scenarios 5, 7, 8, 10
- user-scenarios-live.spec.ts (1 test): 100+ round real LLM conversation
  with context recall verification — uses live backend
2026-04-07 17:44:31 +08:00

271 lines
13 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* ZCLAW Live Multi-Turn Dialogue E2E Test
*
* Uses REAL LLM API for 100+ round conversation.
* Requires:
* - Dev server running at localhost:1420
* - Backend connected with valid API key
* - Real LLM provider configured
*
* Run: npx playwright test user-scenarios-live.spec.ts --headed
*
* The test simulates a toy factory owner's daily conversations.
*/
import { test, expect } from '@playwright/test';
import { storeInspectors, STORE_NAMES } from '../fixtures/store-inspectors';
import { skipOnboarding, waitForAppReady, navigateToTab } from '../utils/user-actions';
const BASE_URL = 'http://localhost:1420';
test.setTimeout(600000); // 10 minutes for live test
/** Helper: click send button */
async function clickSend(page: import('@playwright/test').Page) {
const sendButton = page.getByRole('button', { name: '发送消息' }).or(
page.locator('button.bg-orange-500').first()
);
await sendButton.first().click();
}
/** Helper: wait for streaming to complete */
async function waitForStreamComplete(page: import('@playwright/test').Page, timeout = 60000) {
await page.waitForFunction(
() => {
const stored = localStorage.getItem('zclaw-chat-storage');
if (!stored) return true;
try {
const state = JSON.parse(stored).state;
return state.isStreaming === false;
} catch {
return true;
}
},
{ timeout }
).catch(() => {});
}
/**
* 100+ round conversation plan — simulating a toy factory owner's day
*/
const CONVERSATION_PLAN = [
// Phase 1: Greeting & introduction (rounds 1-10)
{ content: '你好,我是做玩具的,姓李', category: 'greeting' },
{ content: '我有一家小工厂在澄海', category: 'greeting' },
{ content: '做塑料玩具十几年了', category: 'greeting' },
{ content: '最近想了解一下AI能怎么帮我', category: 'greeting' },
{ content: '我们主要做出口,欧洲和北美', category: 'intro' },
{ content: '工人大概50个', category: 'intro' },
{ content: '年产值大概两三千万', category: 'intro' },
{ content: '你能不能帮我介绍一下你的能力?', category: 'intro' },
{ content: '听起来不错,那我们慢慢聊', category: 'greeting' },
{ content: '先帮我看看最近的ABS原料价格趋势', category: 'transition' },
// Phase 2: Material price analysis (rounds 11-30)
{ content: 'ABS最近的走势怎么样', category: 'material' },
{ content: '和国际油价有关系吗?', category: 'material' },
{ content: '苯乙烯的价格最近多少一吨?', category: 'material' },
{ content: '那现在14000一吨的ABS算便宜还是贵', category: 'material' },
{ content: '我一般一个月用多少原料呢大概50吨', category: 'material' },
{ content: '那算下来一个月原料成本大概70万', category: 'material' },
{ content: '有没什么方法能降低原料成本?', category: 'material' },
{ content: '囤货的话风险大不大?', category: 'material' },
{ content: '我听说有些厂家用回料,你怎么看?', category: 'material' },
{ content: '出口欧洲的话,回料可以用吗?', category: 'material' },
{ content: 'EN71标准对材质有什么要求', category: 'material' },
{ content: 'REACH呢对塑料原料有限制吗', category: 'material' },
{ content: '那食品级的ABS和非食品级差多少价格', category: 'material' },
{ content: '澄海这边有好的ABS供应商推荐吗', category: 'material' },
{ content: '中石化、台化和奇美的料哪个好?', category: 'material' },
{ content: '我之前一直用奇美的757你觉得怎么样', category: 'material' },
{ content: '有没有性价比更高的替代品?', category: 'material' },
{ content: '直接从厂家拿货和从经销商拿货差多少?', category: 'material' },
{ content: '付款方式一般是怎么样的?', category: 'material' },
{ content: '好的,原料这块我先了解了,谢谢你', category: 'transition' },
// Phase 3: Supplier comparison (rounds 31-50)
{ content: '帮我对比一下几个主要供应商', category: 'supplier' },
{ content: '奇美、台化、镇海炼化各有什么优势?', category: 'supplier' },
{ content: '交期方面呢?', category: 'supplier' },
{ content: '售后服务哪个更好?', category: 'supplier' },
{ content: '我之前的供应商突然涨价了30%,合理吗?', category: 'supplier' },
{ content: '一般涨价多少算是正常范围?', category: 'supplier' },
{ content: '我应该怎么和供应商谈判?', category: 'supplier' },
{ content: '签长期合同有什么注意事项?', category: 'supplier' },
{ content: '保底价格和浮动价格哪种更好?', category: 'supplier' },
{ content: '如果我一次订100吨能拿什么折扣', category: 'supplier' },
{ content: '物流费用怎么算?到澄海大概多少钱?', category: 'supplier' },
{ content: '期货和现货哪个划算?', category: 'supplier' },
{ content: '有没有供应商协会或者展会推荐?', category: 'supplier' },
{ content: '塑料交易网的信息准不准?', category: 'supplier' },
{ content: '好的,供应商这块我先做做功课', category: 'transition' },
{ content: '接下来我想聊聊产品设计', category: 'transition' },
// Phase 4: Product design (rounds 51-70)
{ content: '今年流行什么类型的玩具?', category: 'design' },
{ content: '欧美市场喜欢什么风格?', category: 'design' },
{ content: '盲盒类的产品还有市场吗?', category: 'design' },
{ content: 'STEAM教育玩具前景怎么样', category: 'design' },
{ content: '环保材质的玩具能卖贵一点吗?', category: 'design' },
{ content: '用PCR材料做玩具可行吗', category: 'design' },
{ content: '设计版权怎么保护?', category: 'design' },
{ content: '我请一个自由设计师大概要多少钱?', category: 'design' },
{ content: '开模费用一般是多少?', category: 'design' },
{ content: '一个新产品从设计到量产大概要多久?', category: 'design' },
{ content: '小批量试产有什么好的方案?', category: 'design' },
{ content: '3D打印做原型靠谱吗', category: 'design' },
{ content: '包装设计有什么要注意的?', category: 'design' },
{ content: '出口欧洲的包装有什么特殊要求?', category: 'design' },
{ content: 'CE认证好办吗大概多少钱', category: 'design' },
{ content: '认证周期多长?', category: 'design' },
{ content: '好的,产品设计这块很有收获', category: 'transition' },
// Phase 5: Seasonal planning (rounds 71-90)
{ content: '马上要进入旺季了,怎么备货比较好?', category: 'planning' },
{ content: '圣诞节一般提前多久开始备货?', category: 'planning' },
{ content: '去年圣诞节的销售情况怎么样?', category: 'planning' },
{ content: '除了圣诞还有什么旺季?', category: 'planning' },
{ content: '万圣节的市场大不大?', category: 'planning' },
{ content: '夏天有什么好的品类?', category: 'planning' },
{ content: '库存管理有什么好的工具推荐?', category: 'planning' },
{ content: '安全库存怎么算?', category: 'planning' },
{ content: '我一般保持多少天的库存比较合适?', category: 'planning' },
{ content: '资金周转有什么好的建议?', category: 'planning' },
{ content: '银行贷款和供应商赊账哪个更好?', category: 'planning' },
{ content: '有什么补贴政策可以利用吗?', category: 'planning' },
{ content: '出口退税能退多少?', category: 'planning' },
{ content: '澄海政府对玩具有什么扶持政策?', category: 'planning' },
{ content: '参加广交会效果好还是香港玩具展好?', category: 'planning' },
{ content: '线上渠道有什么推荐?', category: 'planning' },
{ content: '亚马逊玩具类目竞争激烈吗?', category: 'planning' },
{ content: 'tiktok shop能卖玩具吗', category: 'planning' },
{ content: '好的,备货和渠道我了解了', category: 'transition' },
{ content: '最后帮我总结一下今天聊的内容', category: 'summary' },
// Phase 6: Mixed questions (rounds 91-110)
{ content: '对了,你还记得我在哪做玩具吗?', category: 'recall' },
{ content: '我主要用什么材料?', category: 'recall' },
{ content: '出口哪些市场?', category: 'recall' },
{ content: '好的,记忆力不错', category: 'recall' },
{ content: '有没有什么自动化的工具能帮我管理工厂?', category: 'general' },
{ content: 'ERP系统有什么推荐', category: 'general' },
{ content: '小工厂用Excel够用吗', category: 'general' },
{ content: '工人管理有什么好的方法?', category: 'general' },
{ content: '计件工资和计时工资哪个好?', category: 'general' },
{ content: '怎么减少废品率?', category: 'general' },
{ content: '品质管控有什么标准流程?', category: 'general' },
{ content: '出货前要做哪些检测?', category: 'general' },
{ content: '客户投诉怎么处理比较好?', category: 'general' },
{ content: '退货率控制在多少以内算正常?', category: 'general' },
{ content: '好的,今天收获很大,谢谢你的建议', category: 'closing' },
{ content: '下次我想聊聊怎么用AI帮我做市场调研', category: 'closing' },
{ content: '你有什么功能能帮我自动化做一些事情吗?', category: 'closing' },
{ content: '比如每天帮我查ABS价格', category: 'closing' },
{ content: '能帮我整理供应商信息吗?', category: 'closing' },
{ content: '太好了,下次见!', category: 'closing' },
];
test.describe('Live Multi-Turn Dialogue (Real LLM)', () => {
// Mark this test as slow
test.slow();
test('LIVE-01: 100+ round conversation with real LLM', async ({ page }) => {
// No mock gateway — use real backend
await skipOnboarding(page);
await page.goto(BASE_URL);
await waitForAppReady(page);
// Clear previous messages
await storeInspectors.clearStore(page, STORE_NAMES.CHAT);
await page.waitForTimeout(1000);
let successfulRounds = 0;
let contextRecallPassed = false;
const errors: string[] = [];
for (let i = 0; i < CONVERSATION_PLAN.length; i++) {
const { content, category } = CONVERSATION_PLAN[i];
try {
// Find and fill chat input
const chatInput = page.locator('textarea').first();
await chatInput.waitFor({ state: 'visible', timeout: 10000 });
await chatInput.fill(content);
await clickSend(page);
// Wait for streaming to complete
await waitForStreamComplete(page, 120000);
await page.waitForTimeout(1000); // Small buffer
successfulRounds++;
// Check context recall at specific points
if (category === 'recall' && content.includes('我在哪做玩具')) {
// The response should mention 澄海
const state = await storeInspectors.getChatState<{
messages: Array<{ content: string; role: string }>;
}>(page);
const lastAssistantMsg = [...(state?.messages ?? [])]
.reverse()
.find(m => m.role === 'assistant');
if (lastAssistantMsg?.content?.includes('澄海')) {
contextRecallPassed = true;
}
}
// Log progress every 10 rounds
if ((i + 1) % 10 === 0) {
console.log(`[Live Test] Completed ${i + 1}/${CONVERSATION_PLAN.length} rounds`);
await page.screenshot({
path: `test-results/screenshots/live-round-${i + 1}.png`,
});
}
} catch (err) {
errors.push(`Round ${i + 1} (${category}): ${err}`);
// Try to recover
await page.waitForTimeout(2000);
}
}
// ═══ Assertions ═══
// 1. Should complete most rounds
expect(successfulRounds).toBeGreaterThanOrEqual(
Math.floor(CONVERSATION_PLAN.length * 0.7)
);
// 2. Context recall should work
if (successfulRounds > 80) {
expect(contextRecallPassed).toBe(true);
}
// 3. Messages should be persisted
const finalState = await storeInspectors.getChatState<{
messages: Array<{ content: string; role: string }>;
}>(page);
const userMsgs = finalState?.messages?.filter(m => m.role === 'user') ?? [];
expect(userMsgs.length).toBeGreaterThanOrEqual(
Math.floor(CONVERSATION_PLAN.length * 0.5)
);
// 4. No console errors that crash the app
const consoleErrors = await page.evaluate(() => {
return (window as any).__consoleErrors ?? [];
});
expect(consoleErrors.length).toBeLessThan(5);
// 5. App should still be responsive
await expect(page.locator('textarea').first()).toBeVisible({ timeout: 5000 });
// Report
console.log(`\n═══ Live Test Report ═══`);
console.log(`Successful rounds: ${successfulRounds}/${CONVERSATION_PLAN.length}`);
console.log(`Context recall passed: ${contextRecallPassed}`);
console.log(`Total messages in store: ${finalState?.messages?.length ?? 0}`);
console.log(`Errors: ${errors.length}`);
if (errors.length > 0) {
console.log('Error details:', errors);
}
console.log(`════════════════════════\n`);
});
});