Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
4 new Playwright spec files covering all 10 planned E2E scenarios: - user-scenarios-core.spec.ts (14 tests): Onboarding, multi-turn dialogue, model switching — covers scenarios 1-3 - user-scenarios-automation.spec.ts (16 tests): Hands CRUD/trigger/approval, Pipeline workflow, automation triggers — covers scenarios 4, 6, 9 - user-scenarios-saas-memory.spec.ts (16 tests): Memory system, settings config, SaaS integration, butler panel — covers scenarios 5, 7, 8, 10 - user-scenarios-live.spec.ts (1 test): 100+ round real LLM conversation with context recall verification — uses live backend
271 lines
13 KiB
TypeScript
271 lines
13 KiB
TypeScript
/**
|
||
* ZCLAW Live Multi-Turn Dialogue E2E Test
|
||
*
|
||
* Uses REAL LLM API for 100+ round conversation.
|
||
* Requires:
|
||
* - Dev server running at localhost:1420
|
||
* - Backend connected with valid API key
|
||
* - Real LLM provider configured
|
||
*
|
||
* Run: npx playwright test user-scenarios-live.spec.ts --headed
|
||
*
|
||
* The test simulates a toy factory owner's daily conversations.
|
||
*/
|
||
|
||
import { test, expect } from '@playwright/test';
|
||
import { storeInspectors, STORE_NAMES } from '../fixtures/store-inspectors';
|
||
import { skipOnboarding, waitForAppReady, navigateToTab } from '../utils/user-actions';
|
||
|
||
const BASE_URL = 'http://localhost:1420';
|
||
test.setTimeout(600000); // 10 minutes for live test
|
||
|
||
/** Helper: click send button */
|
||
async function clickSend(page: import('@playwright/test').Page) {
|
||
const sendButton = page.getByRole('button', { name: '发送消息' }).or(
|
||
page.locator('button.bg-orange-500').first()
|
||
);
|
||
await sendButton.first().click();
|
||
}
|
||
|
||
/** Helper: wait for streaming to complete */
|
||
async function waitForStreamComplete(page: import('@playwright/test').Page, timeout = 60000) {
|
||
await page.waitForFunction(
|
||
() => {
|
||
const stored = localStorage.getItem('zclaw-chat-storage');
|
||
if (!stored) return true;
|
||
try {
|
||
const state = JSON.parse(stored).state;
|
||
return state.isStreaming === false;
|
||
} catch {
|
||
return true;
|
||
}
|
||
},
|
||
{ timeout }
|
||
).catch(() => {});
|
||
}
|
||
|
||
/**
|
||
* 100+ round conversation plan — simulating a toy factory owner's day
|
||
*/
|
||
const CONVERSATION_PLAN = [
|
||
// Phase 1: Greeting & introduction (rounds 1-10)
|
||
{ content: '你好,我是做玩具的,姓李', category: 'greeting' },
|
||
{ content: '我有一家小工厂在澄海', category: 'greeting' },
|
||
{ content: '做塑料玩具十几年了', category: 'greeting' },
|
||
{ content: '最近想了解一下AI能怎么帮我', category: 'greeting' },
|
||
{ content: '我们主要做出口,欧洲和北美', category: 'intro' },
|
||
{ content: '工人大概50个', category: 'intro' },
|
||
{ content: '年产值大概两三千万', category: 'intro' },
|
||
{ content: '你能不能帮我介绍一下你的能力?', category: 'intro' },
|
||
{ content: '听起来不错,那我们慢慢聊', category: 'greeting' },
|
||
{ content: '先帮我看看最近的ABS原料价格趋势', category: 'transition' },
|
||
|
||
// Phase 2: Material price analysis (rounds 11-30)
|
||
{ content: 'ABS最近的走势怎么样?', category: 'material' },
|
||
{ content: '和国际油价有关系吗?', category: 'material' },
|
||
{ content: '苯乙烯的价格最近多少一吨?', category: 'material' },
|
||
{ content: '那现在14000一吨的ABS算便宜还是贵?', category: 'material' },
|
||
{ content: '我一般一个月用多少原料呢?大概50吨', category: 'material' },
|
||
{ content: '那算下来一个月原料成本大概70万?', category: 'material' },
|
||
{ content: '有没什么方法能降低原料成本?', category: 'material' },
|
||
{ content: '囤货的话风险大不大?', category: 'material' },
|
||
{ content: '我听说有些厂家用回料,你怎么看?', category: 'material' },
|
||
{ content: '出口欧洲的话,回料可以用吗?', category: 'material' },
|
||
{ content: 'EN71标准对材质有什么要求?', category: 'material' },
|
||
{ content: 'REACH呢?对塑料原料有限制吗?', category: 'material' },
|
||
{ content: '那食品级的ABS和非食品级差多少价格?', category: 'material' },
|
||
{ content: '澄海这边有好的ABS供应商推荐吗?', category: 'material' },
|
||
{ content: '中石化、台化和奇美的料哪个好?', category: 'material' },
|
||
{ content: '我之前一直用奇美的757,你觉得怎么样?', category: 'material' },
|
||
{ content: '有没有性价比更高的替代品?', category: 'material' },
|
||
{ content: '直接从厂家拿货和从经销商拿货差多少?', category: 'material' },
|
||
{ content: '付款方式一般是怎么样的?', category: 'material' },
|
||
{ content: '好的,原料这块我先了解了,谢谢你', category: 'transition' },
|
||
|
||
// Phase 3: Supplier comparison (rounds 31-50)
|
||
{ content: '帮我对比一下几个主要供应商', category: 'supplier' },
|
||
{ content: '奇美、台化、镇海炼化各有什么优势?', category: 'supplier' },
|
||
{ content: '交期方面呢?', category: 'supplier' },
|
||
{ content: '售后服务哪个更好?', category: 'supplier' },
|
||
{ content: '我之前的供应商突然涨价了30%,合理吗?', category: 'supplier' },
|
||
{ content: '一般涨价多少算是正常范围?', category: 'supplier' },
|
||
{ content: '我应该怎么和供应商谈判?', category: 'supplier' },
|
||
{ content: '签长期合同有什么注意事项?', category: 'supplier' },
|
||
{ content: '保底价格和浮动价格哪种更好?', category: 'supplier' },
|
||
{ content: '如果我一次订100吨能拿什么折扣?', category: 'supplier' },
|
||
{ content: '物流费用怎么算?到澄海大概多少钱?', category: 'supplier' },
|
||
{ content: '期货和现货哪个划算?', category: 'supplier' },
|
||
{ content: '有没有供应商协会或者展会推荐?', category: 'supplier' },
|
||
{ content: '塑料交易网的信息准不准?', category: 'supplier' },
|
||
{ content: '好的,供应商这块我先做做功课', category: 'transition' },
|
||
{ content: '接下来我想聊聊产品设计', category: 'transition' },
|
||
|
||
// Phase 4: Product design (rounds 51-70)
|
||
{ content: '今年流行什么类型的玩具?', category: 'design' },
|
||
{ content: '欧美市场喜欢什么风格?', category: 'design' },
|
||
{ content: '盲盒类的产品还有市场吗?', category: 'design' },
|
||
{ content: 'STEAM教育玩具前景怎么样?', category: 'design' },
|
||
{ content: '环保材质的玩具能卖贵一点吗?', category: 'design' },
|
||
{ content: '用PCR材料做玩具可行吗?', category: 'design' },
|
||
{ content: '设计版权怎么保护?', category: 'design' },
|
||
{ content: '我请一个自由设计师大概要多少钱?', category: 'design' },
|
||
{ content: '开模费用一般是多少?', category: 'design' },
|
||
{ content: '一个新产品从设计到量产大概要多久?', category: 'design' },
|
||
{ content: '小批量试产有什么好的方案?', category: 'design' },
|
||
{ content: '3D打印做原型靠谱吗?', category: 'design' },
|
||
{ content: '包装设计有什么要注意的?', category: 'design' },
|
||
{ content: '出口欧洲的包装有什么特殊要求?', category: 'design' },
|
||
{ content: 'CE认证好办吗?大概多少钱?', category: 'design' },
|
||
{ content: '认证周期多长?', category: 'design' },
|
||
{ content: '好的,产品设计这块很有收获', category: 'transition' },
|
||
|
||
// Phase 5: Seasonal planning (rounds 71-90)
|
||
{ content: '马上要进入旺季了,怎么备货比较好?', category: 'planning' },
|
||
{ content: '圣诞节一般提前多久开始备货?', category: 'planning' },
|
||
{ content: '去年圣诞节的销售情况怎么样?', category: 'planning' },
|
||
{ content: '除了圣诞还有什么旺季?', category: 'planning' },
|
||
{ content: '万圣节的市场大不大?', category: 'planning' },
|
||
{ content: '夏天有什么好的品类?', category: 'planning' },
|
||
{ content: '库存管理有什么好的工具推荐?', category: 'planning' },
|
||
{ content: '安全库存怎么算?', category: 'planning' },
|
||
{ content: '我一般保持多少天的库存比较合适?', category: 'planning' },
|
||
{ content: '资金周转有什么好的建议?', category: 'planning' },
|
||
{ content: '银行贷款和供应商赊账哪个更好?', category: 'planning' },
|
||
{ content: '有什么补贴政策可以利用吗?', category: 'planning' },
|
||
{ content: '出口退税能退多少?', category: 'planning' },
|
||
{ content: '澄海政府对玩具有什么扶持政策?', category: 'planning' },
|
||
{ content: '参加广交会效果好还是香港玩具展好?', category: 'planning' },
|
||
{ content: '线上渠道有什么推荐?', category: 'planning' },
|
||
{ content: '亚马逊玩具类目竞争激烈吗?', category: 'planning' },
|
||
{ content: 'tiktok shop能卖玩具吗?', category: 'planning' },
|
||
{ content: '好的,备货和渠道我了解了', category: 'transition' },
|
||
{ content: '最后帮我总结一下今天聊的内容', category: 'summary' },
|
||
|
||
// Phase 6: Mixed questions (rounds 91-110)
|
||
{ content: '对了,你还记得我在哪做玩具吗?', category: 'recall' },
|
||
{ content: '我主要用什么材料?', category: 'recall' },
|
||
{ content: '出口哪些市场?', category: 'recall' },
|
||
{ content: '好的,记忆力不错', category: 'recall' },
|
||
{ content: '有没有什么自动化的工具能帮我管理工厂?', category: 'general' },
|
||
{ content: 'ERP系统有什么推荐?', category: 'general' },
|
||
{ content: '小工厂用Excel够用吗?', category: 'general' },
|
||
{ content: '工人管理有什么好的方法?', category: 'general' },
|
||
{ content: '计件工资和计时工资哪个好?', category: 'general' },
|
||
{ content: '怎么减少废品率?', category: 'general' },
|
||
{ content: '品质管控有什么标准流程?', category: 'general' },
|
||
{ content: '出货前要做哪些检测?', category: 'general' },
|
||
{ content: '客户投诉怎么处理比较好?', category: 'general' },
|
||
{ content: '退货率控制在多少以内算正常?', category: 'general' },
|
||
{ content: '好的,今天收获很大,谢谢你的建议', category: 'closing' },
|
||
{ content: '下次我想聊聊怎么用AI帮我做市场调研', category: 'closing' },
|
||
{ content: '你有什么功能能帮我自动化做一些事情吗?', category: 'closing' },
|
||
{ content: '比如每天帮我查ABS价格?', category: 'closing' },
|
||
{ content: '能帮我整理供应商信息吗?', category: 'closing' },
|
||
{ content: '太好了,下次见!', category: 'closing' },
|
||
];
|
||
|
||
test.describe('Live Multi-Turn Dialogue (Real LLM)', () => {
|
||
// Mark this test as slow
|
||
test.slow();
|
||
|
||
test('LIVE-01: 100+ round conversation with real LLM', async ({ page }) => {
|
||
// No mock gateway — use real backend
|
||
await skipOnboarding(page);
|
||
await page.goto(BASE_URL);
|
||
await waitForAppReady(page);
|
||
|
||
// Clear previous messages
|
||
await storeInspectors.clearStore(page, STORE_NAMES.CHAT);
|
||
await page.waitForTimeout(1000);
|
||
|
||
let successfulRounds = 0;
|
||
let contextRecallPassed = false;
|
||
const errors: string[] = [];
|
||
|
||
for (let i = 0; i < CONVERSATION_PLAN.length; i++) {
|
||
const { content, category } = CONVERSATION_PLAN[i];
|
||
|
||
try {
|
||
// Find and fill chat input
|
||
const chatInput = page.locator('textarea').first();
|
||
await chatInput.waitFor({ state: 'visible', timeout: 10000 });
|
||
await chatInput.fill(content);
|
||
await clickSend(page);
|
||
|
||
// Wait for streaming to complete
|
||
await waitForStreamComplete(page, 120000);
|
||
await page.waitForTimeout(1000); // Small buffer
|
||
|
||
successfulRounds++;
|
||
|
||
// Check context recall at specific points
|
||
if (category === 'recall' && content.includes('我在哪做玩具')) {
|
||
// The response should mention 澄海
|
||
const state = await storeInspectors.getChatState<{
|
||
messages: Array<{ content: string; role: string }>;
|
||
}>(page);
|
||
const lastAssistantMsg = [...(state?.messages ?? [])]
|
||
.reverse()
|
||
.find(m => m.role === 'assistant');
|
||
if (lastAssistantMsg?.content?.includes('澄海')) {
|
||
contextRecallPassed = true;
|
||
}
|
||
}
|
||
|
||
// Log progress every 10 rounds
|
||
if ((i + 1) % 10 === 0) {
|
||
console.log(`[Live Test] Completed ${i + 1}/${CONVERSATION_PLAN.length} rounds`);
|
||
await page.screenshot({
|
||
path: `test-results/screenshots/live-round-${i + 1}.png`,
|
||
});
|
||
}
|
||
} catch (err) {
|
||
errors.push(`Round ${i + 1} (${category}): ${err}`);
|
||
// Try to recover
|
||
await page.waitForTimeout(2000);
|
||
}
|
||
}
|
||
|
||
// ═══ Assertions ═══
|
||
|
||
// 1. Should complete most rounds
|
||
expect(successfulRounds).toBeGreaterThanOrEqual(
|
||
Math.floor(CONVERSATION_PLAN.length * 0.7)
|
||
);
|
||
|
||
// 2. Context recall should work
|
||
if (successfulRounds > 80) {
|
||
expect(contextRecallPassed).toBe(true);
|
||
}
|
||
|
||
// 3. Messages should be persisted
|
||
const finalState = await storeInspectors.getChatState<{
|
||
messages: Array<{ content: string; role: string }>;
|
||
}>(page);
|
||
const userMsgs = finalState?.messages?.filter(m => m.role === 'user') ?? [];
|
||
expect(userMsgs.length).toBeGreaterThanOrEqual(
|
||
Math.floor(CONVERSATION_PLAN.length * 0.5)
|
||
);
|
||
|
||
// 4. No console errors that crash the app
|
||
const consoleErrors = await page.evaluate(() => {
|
||
return (window as any).__consoleErrors ?? [];
|
||
});
|
||
expect(consoleErrors.length).toBeLessThan(5);
|
||
|
||
// 5. App should still be responsive
|
||
await expect(page.locator('textarea').first()).toBeVisible({ timeout: 5000 });
|
||
|
||
// Report
|
||
console.log(`\n═══ Live Test Report ═══`);
|
||
console.log(`Successful rounds: ${successfulRounds}/${CONVERSATION_PLAN.length}`);
|
||
console.log(`Context recall passed: ${contextRecallPassed}`);
|
||
console.log(`Total messages in store: ${finalState?.messages?.length ?? 0}`);
|
||
console.log(`Errors: ${errors.length}`);
|
||
if (errors.length > 0) {
|
||
console.log('Error details:', errors);
|
||
}
|
||
console.log(`════════════════════════\n`);
|
||
});
|
||
});
|