/** * ZCLAW Live Multi-Turn Dialogue E2E Test * * Uses REAL LLM API for 100+ round conversation. * Requires: * - Dev server running at localhost:1420 * - Backend connected with valid API key * - Real LLM provider configured * * Run: npx playwright test user-scenarios-live.spec.ts --headed * * The test simulates a toy factory owner's daily conversations. */ import { test, expect } from '@playwright/test'; import { storeInspectors, STORE_NAMES } from '../fixtures/store-inspectors'; import { skipOnboarding, waitForAppReady, navigateToTab } from '../utils/user-actions'; const BASE_URL = 'http://localhost:1420'; test.setTimeout(600000); // 10 minutes for live test /** Helper: click send button */ async function clickSend(page: import('@playwright/test').Page) { const sendButton = page.getByRole('button', { name: '发送消息' }).or( page.locator('button.bg-orange-500').first() ); await sendButton.first().click(); } /** Helper: wait for streaming to complete */ async function waitForStreamComplete(page: import('@playwright/test').Page, timeout = 60000) { await page.waitForFunction( () => { const stored = localStorage.getItem('zclaw-chat-storage'); if (!stored) return true; try { const state = JSON.parse(stored).state; return state.isStreaming === false; } catch { return true; } }, { timeout } ).catch(() => {}); } /** * 100+ round conversation plan — simulating a toy factory owner's day */ const CONVERSATION_PLAN = [ // Phase 1: Greeting & introduction (rounds 1-10) { content: '你好,我是做玩具的,姓李', category: 'greeting' }, { content: '我有一家小工厂在澄海', category: 'greeting' }, { content: '做塑料玩具十几年了', category: 'greeting' }, { content: '最近想了解一下AI能怎么帮我', category: 'greeting' }, { content: '我们主要做出口,欧洲和北美', category: 'intro' }, { content: '工人大概50个', category: 'intro' }, { content: '年产值大概两三千万', category: 'intro' }, { content: '你能不能帮我介绍一下你的能力?', category: 'intro' }, { content: '听起来不错,那我们慢慢聊', category: 'greeting' }, { content: '先帮我看看最近的ABS原料价格趋势', category: 'transition' }, // Phase 2: Material price analysis (rounds 11-30) { content: 'ABS最近的走势怎么样?', category: 'material' }, { content: '和国际油价有关系吗?', category: 'material' }, { content: '苯乙烯的价格最近多少一吨?', category: 'material' }, { content: '那现在14000一吨的ABS算便宜还是贵?', category: 'material' }, { content: '我一般一个月用多少原料呢?大概50吨', category: 'material' }, { content: '那算下来一个月原料成本大概70万?', category: 'material' }, { content: '有没什么方法能降低原料成本?', category: 'material' }, { content: '囤货的话风险大不大?', category: 'material' }, { content: '我听说有些厂家用回料,你怎么看?', category: 'material' }, { content: '出口欧洲的话,回料可以用吗?', category: 'material' }, { content: 'EN71标准对材质有什么要求?', category: 'material' }, { content: 'REACH呢?对塑料原料有限制吗?', category: 'material' }, { content: '那食品级的ABS和非食品级差多少价格?', category: 'material' }, { content: '澄海这边有好的ABS供应商推荐吗?', category: 'material' }, { content: '中石化、台化和奇美的料哪个好?', category: 'material' }, { content: '我之前一直用奇美的757,你觉得怎么样?', category: 'material' }, { content: '有没有性价比更高的替代品?', category: 'material' }, { content: '直接从厂家拿货和从经销商拿货差多少?', category: 'material' }, { content: '付款方式一般是怎么样的?', category: 'material' }, { content: '好的,原料这块我先了解了,谢谢你', category: 'transition' }, // Phase 3: Supplier comparison (rounds 31-50) { content: '帮我对比一下几个主要供应商', category: 'supplier' }, { content: '奇美、台化、镇海炼化各有什么优势?', category: 'supplier' }, { content: '交期方面呢?', category: 'supplier' }, { content: '售后服务哪个更好?', category: 'supplier' }, { content: '我之前的供应商突然涨价了30%,合理吗?', category: 'supplier' }, { content: '一般涨价多少算是正常范围?', category: 'supplier' }, { content: '我应该怎么和供应商谈判?', category: 'supplier' }, { content: '签长期合同有什么注意事项?', category: 'supplier' }, { content: '保底价格和浮动价格哪种更好?', category: 'supplier' }, { content: '如果我一次订100吨能拿什么折扣?', category: 'supplier' }, { content: '物流费用怎么算?到澄海大概多少钱?', category: 'supplier' }, { content: '期货和现货哪个划算?', category: 'supplier' }, { content: '有没有供应商协会或者展会推荐?', category: 'supplier' }, { content: '塑料交易网的信息准不准?', category: 'supplier' }, { content: '好的,供应商这块我先做做功课', category: 'transition' }, { content: '接下来我想聊聊产品设计', category: 'transition' }, // Phase 4: Product design (rounds 51-70) { content: '今年流行什么类型的玩具?', category: 'design' }, { content: '欧美市场喜欢什么风格?', category: 'design' }, { content: '盲盒类的产品还有市场吗?', category: 'design' }, { content: 'STEAM教育玩具前景怎么样?', category: 'design' }, { content: '环保材质的玩具能卖贵一点吗?', category: 'design' }, { content: '用PCR材料做玩具可行吗?', category: 'design' }, { content: '设计版权怎么保护?', category: 'design' }, { content: '我请一个自由设计师大概要多少钱?', category: 'design' }, { content: '开模费用一般是多少?', category: 'design' }, { content: '一个新产品从设计到量产大概要多久?', category: 'design' }, { content: '小批量试产有什么好的方案?', category: 'design' }, { content: '3D打印做原型靠谱吗?', category: 'design' }, { content: '包装设计有什么要注意的?', category: 'design' }, { content: '出口欧洲的包装有什么特殊要求?', category: 'design' }, { content: 'CE认证好办吗?大概多少钱?', category: 'design' }, { content: '认证周期多长?', category: 'design' }, { content: '好的,产品设计这块很有收获', category: 'transition' }, // Phase 5: Seasonal planning (rounds 71-90) { content: '马上要进入旺季了,怎么备货比较好?', category: 'planning' }, { content: '圣诞节一般提前多久开始备货?', category: 'planning' }, { content: '去年圣诞节的销售情况怎么样?', category: 'planning' }, { content: '除了圣诞还有什么旺季?', category: 'planning' }, { content: '万圣节的市场大不大?', category: 'planning' }, { content: '夏天有什么好的品类?', category: 'planning' }, { content: '库存管理有什么好的工具推荐?', category: 'planning' }, { content: '安全库存怎么算?', category: 'planning' }, { content: '我一般保持多少天的库存比较合适?', category: 'planning' }, { content: '资金周转有什么好的建议?', category: 'planning' }, { content: '银行贷款和供应商赊账哪个更好?', category: 'planning' }, { content: '有什么补贴政策可以利用吗?', category: 'planning' }, { content: '出口退税能退多少?', category: 'planning' }, { content: '澄海政府对玩具有什么扶持政策?', category: 'planning' }, { content: '参加广交会效果好还是香港玩具展好?', category: 'planning' }, { content: '线上渠道有什么推荐?', category: 'planning' }, { content: '亚马逊玩具类目竞争激烈吗?', category: 'planning' }, { content: 'tiktok shop能卖玩具吗?', category: 'planning' }, { content: '好的,备货和渠道我了解了', category: 'transition' }, { content: '最后帮我总结一下今天聊的内容', category: 'summary' }, // Phase 6: Mixed questions (rounds 91-110) { content: '对了,你还记得我在哪做玩具吗?', category: 'recall' }, { content: '我主要用什么材料?', category: 'recall' }, { content: '出口哪些市场?', category: 'recall' }, { content: '好的,记忆力不错', category: 'recall' }, { content: '有没有什么自动化的工具能帮我管理工厂?', category: 'general' }, { content: 'ERP系统有什么推荐?', category: 'general' }, { content: '小工厂用Excel够用吗?', category: 'general' }, { content: '工人管理有什么好的方法?', category: 'general' }, { content: '计件工资和计时工资哪个好?', category: 'general' }, { content: '怎么减少废品率?', category: 'general' }, { content: '品质管控有什么标准流程?', category: 'general' }, { content: '出货前要做哪些检测?', category: 'general' }, { content: '客户投诉怎么处理比较好?', category: 'general' }, { content: '退货率控制在多少以内算正常?', category: 'general' }, { content: '好的,今天收获很大,谢谢你的建议', category: 'closing' }, { content: '下次我想聊聊怎么用AI帮我做市场调研', category: 'closing' }, { content: '你有什么功能能帮我自动化做一些事情吗?', category: 'closing' }, { content: '比如每天帮我查ABS价格?', category: 'closing' }, { content: '能帮我整理供应商信息吗?', category: 'closing' }, { content: '太好了,下次见!', category: 'closing' }, ]; test.describe('Live Multi-Turn Dialogue (Real LLM)', () => { // Mark this test as slow test.slow(); test('LIVE-01: 100+ round conversation with real LLM', async ({ page }) => { // No mock gateway — use real backend await skipOnboarding(page); await page.goto(BASE_URL); await waitForAppReady(page); // Clear previous messages await storeInspectors.clearStore(page, STORE_NAMES.CHAT); await page.waitForTimeout(1000); let successfulRounds = 0; let contextRecallPassed = false; const errors: string[] = []; for (let i = 0; i < CONVERSATION_PLAN.length; i++) { const { content, category } = CONVERSATION_PLAN[i]; try { // Find and fill chat input const chatInput = page.locator('textarea').first(); await chatInput.waitFor({ state: 'visible', timeout: 10000 }); await chatInput.fill(content); await clickSend(page); // Wait for streaming to complete await waitForStreamComplete(page, 120000); await page.waitForTimeout(1000); // Small buffer successfulRounds++; // Check context recall at specific points if (category === 'recall' && content.includes('我在哪做玩具')) { // The response should mention 澄海 const state = await storeInspectors.getChatState<{ messages: Array<{ content: string; role: string }>; }>(page); const lastAssistantMsg = [...(state?.messages ?? [])] .reverse() .find(m => m.role === 'assistant'); if (lastAssistantMsg?.content?.includes('澄海')) { contextRecallPassed = true; } } // Log progress every 10 rounds if ((i + 1) % 10 === 0) { console.log(`[Live Test] Completed ${i + 1}/${CONVERSATION_PLAN.length} rounds`); await page.screenshot({ path: `test-results/screenshots/live-round-${i + 1}.png`, }); } } catch (err) { errors.push(`Round ${i + 1} (${category}): ${err}`); // Try to recover await page.waitForTimeout(2000); } } // ═══ Assertions ═══ // 1. Should complete most rounds expect(successfulRounds).toBeGreaterThanOrEqual( Math.floor(CONVERSATION_PLAN.length * 0.7) ); // 2. Context recall should work if (successfulRounds > 80) { expect(contextRecallPassed).toBe(true); } // 3. Messages should be persisted const finalState = await storeInspectors.getChatState<{ messages: Array<{ content: string; role: string }>; }>(page); const userMsgs = finalState?.messages?.filter(m => m.role === 'user') ?? []; expect(userMsgs.length).toBeGreaterThanOrEqual( Math.floor(CONVERSATION_PLAN.length * 0.5) ); // 4. No console errors that crash the app const consoleErrors = await page.evaluate(() => { return (window as any).__consoleErrors ?? []; }); expect(consoleErrors.length).toBeLessThan(5); // 5. App should still be responsive await expect(page.locator('textarea').first()).toBeVisible({ timeout: 5000 }); // Report console.log(`\n═══ Live Test Report ═══`); console.log(`Successful rounds: ${successfulRounds}/${CONVERSATION_PLAN.length}`); console.log(`Context recall passed: ${contextRecallPassed}`); console.log(`Total messages in store: ${finalState?.messages?.length ?? 0}`); console.log(`Errors: ${errors.length}`); if (errors.length > 0) { console.log('Error details:', errors); } console.log(`════════════════════════\n`); }); });