feat(browser-hand): implement Browser Hand UI components

Add complete Browser Hand UI system for browser automation: Components: - BrowserHandCard: Main card with status display and screenshot preview - TaskTemplateModal: Template selection and parameter configuration - ScreenshotPreview: Screenshot display with fullscreen capability Templates: - Basic operations: navigate, screenshot, form fill, click, execute JS - Scraping: text, list, images, links, tables - Automation: login+action, multi-page, monitoring, pagination Features: - 15 built-in task templates across 3 categories - Real-time execution status with progress bar - Screenshot preview with zoom and fullscreen - Integration with HandsPanel for seamless UX - Zustand store for state management - Comprehensive test coverage (16 tests) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 08:56:02 +08:00
parent 69c874ed59
commit 6bd9b841aa
13 changed files with 3729 additions and 9 deletions
--- a/desktop/src/components/BrowserHand/templates/scraping.ts
+++ b/desktop/src/components/BrowserHand/templates/scraping.ts
@@ -0,0 +1,535 @@
+/**
+ * Scraping Templates for Browser Hand
+ *
+ * Contains data scraping and extraction templates.
+ */
+
+import type { TaskTemplate, ExecutionContext } from './types';
+
+// ============================================================================
+// Template: Scrape Text
+// ============================================================================
+
+const scrapeTextTemplate: TaskTemplate = {
+  id: 'scrape_text',
+  name: '抓取页面文本',
+  description: '从多个选择器提取文本内容',
+  category: 'scraping',
+  icon: 'FileText',
+  params: [
+    {
+      key: 'url',
+      label: '网页地址',
+      type: 'url',
+      required: true,
+      placeholder: 'https://example.com',
+    },
+    {
+      key: 'selectors',
+      label: '选择器列表',
+      type: 'textarea',
+      required: true,
+      placeholder: '.title\n.description\n.price',
+      description: 'CSS 选择器（每行一个）',
+    },
+    {
+      key: 'waitFor',
+      label: '等待元素',
+      type: 'text',
+      required: false,
+      placeholder: '.content',
+      description: '等待此元素出现后再抓取',
+    },
+  ],
+  execute: async (params, context: ExecutionContext) => {
+    const { browser, onProgress, onLog } = context;
+    const url = params.url as string;
+    const selectorsText = params.selectors as string;
+    const waitFor = params.waitFor as string | undefined;
+    const selectors = selectorsText.split('\n').map((s) => s.trim()).filter(Boolean);
+
+    onProgress('正在导航到页面...', 0);
+    onLog('info', `访问: ${url}`);
+    await browser.goto(url);
+
+    if (waitFor) {
+      onProgress('等待页面加载...', 20);
+      onLog('action', `等待元素: ${waitFor}`);
+      await browser.wait(waitFor, 10000);
+    }
+
+    onProgress('正在抓取文本...', 50);
+    const result: Record<string, string | string[]> = {};
+
+    for (let i = 0; i < selectors.length; i++) {
+      const selector = selectors[i];
+      const progress = 50 + Math.floor((i / selectors.length) * 40);
+
+      onProgress(`正在抓取 ${i + 1}/${selectors.length}...`, progress);
+
+      try {
+        // Try to get multiple elements first
+        const multipleResult = await browser.eval(`
+          (selector) => {
+            const elements = document.querySelectorAll(selector);
+            if (elements.length > 1) {
+              return Array.from(elements).map(el => el.textContent?.trim() || '');
+            } else if (elements.length === 1) {
+              return elements[0].textContent?.trim() || '';
+            }
+            return null;
+          }
+        `, [selector]);
+
+        if (multipleResult !== null) {
+          result[selector] = multipleResult as string | string[];
+          onLog('info', `抓取成功: ${selector}`);
+        } else {
+          result[selector] = '';
+          onLog('warn', `未找到元素: ${selector}`);
+        }
+      } catch (error) {
+        result[selector] = '';
+        onLog('error', `抓取失败: ${selector}`, { error: String(error) });
+      }
+    }
+
+    onProgress('完成', 100);
+    return { url: await browser.url(), data: result };
+  },
+};
+
+// ============================================================================
+// Template: Scrape List
+// ============================================================================
+
+const scrapeListTemplate: TaskTemplate = {
+  id: 'scrape_list',
+  name: '提取列表数据',
+  description: '从重复元素中批量提取结构化数据',
+  category: 'scraping',
+  icon: 'List',
+  params: [
+    {
+      key: 'url',
+      label: '网页地址',
+      type: 'url',
+      required: true,
+      placeholder: 'https://example.com/products',
+    },
+    {
+      key: 'itemSelector',
+      label: '列表项选择器',
+      type: 'text',
+      required: true,
+      placeholder: '.product-item',
+      description: '每个列表项的 CSS 选择器',
+    },
+    {
+      key: 'fieldMappings',
+      label: '字段映射',
+      type: 'json',
+      required: true,
+      default: {},
+      description: 'JSON 对象，映射字段名到选择器',
+      placeholder: '{"title": ".title", "price": ".price", "link": "a@href"}',
+    },
+    {
+      key: 'limit',
+      label: '最大数量',
+      type: 'number',
+      required: false,
+      default: 50,
+      min: 1,
+      max: 500,
+      description: '最多提取多少条数据',
+    },
+  ],
+  execute: async (params, context: ExecutionContext) => {
+    const { browser, onProgress, onLog } = context;
+    const url = params.url as string;
+    const itemSelector = params.itemSelector as string;
+    const fieldMappings = params.fieldMappings as Record<string, string>;
+    const limit = (params.limit as number) ?? 50;
+
+    onProgress('正在导航到页面...', 0);
+    onLog('info', `访问: ${url}`);
+    await browser.goto(url);
+
+    onProgress('等待列表加载...', 30);
+    await browser.wait(itemSelector, 10000);
+
+    onProgress('正在提取列表数据...', 50);
+
+    const scrapingScript = `
+      ({ itemSelector, fieldMappings, limit }) => {
+        const items = document.querySelectorAll(itemSelector);
+        const results = [];
+
+        for (let i = 0; i < Math.min(items.length, limit); i++) {
+          const item = items[i];
+          const row = {};
+
+          for (const [field, selector] of Object.entries(fieldMappings)) {
+            // Handle attribute selectors like "a@href"
+            const parts = selector.split('@');
+            const cssSelector = parts[0];
+            const attr = parts[1];
+
+            const el = item.querySelector(cssSelector);
+            if (el) {
+              if (attr) {
+                row[field] = el.getAttribute(attr) || '';
+              } else {
+                row[field] = el.textContent?.trim() || '';
+              }
+            } else {
+              row[field] = '';
+            }
+          }
+
+          results.push(row);
+        }
+
+        return results;
+      }
+    `;
+
+    const result = await browser.eval(scrapingScript, [{
+      itemSelector,
+      fieldMappings,
+      limit,
+    }]);
+
+    const items = result as Array<Record<string, string>>;
+    onLog('info', `提取了 ${items.length} 条数据`);
+
+    onProgress('完成', 100);
+    return {
+      url: await browser.url(),
+      count: items.length,
+      data: items,
+    };
+  },
+};
+
+// ============================================================================
+// Template: Scrape Images
+// ============================================================================
+
+const scrapeImagesTemplate: TaskTemplate = {
+  id: 'scrape_images',
+  name: '抓取图片列表',
+  description: '提取页面中的图片 URL',
+  category: 'scraping',
+  icon: 'Image',
+  params: [
+    {
+      key: 'url',
+      label: '网页地址',
+      type: 'url',
+      required: true,
+      placeholder: 'https://example.com/gallery',
+    },
+    {
+      key: 'imageSelector',
+      label: '图片选择器',
+      type: 'text',
+      required: false,
+      default: 'img',
+      placeholder: 'img.gallery-image',
+      description: '图片元素的 CSS 选择器',
+    },
+    {
+      key: 'minWidth',
+      label: '最小宽度',
+      type: 'number',
+      required: false,
+      default: 100,
+      description: '忽略小于此宽度的图片',
+    },
+    {
+      key: 'minHeight',
+      label: '最小高度',
+      type: 'number',
+      required: false,
+      default: 100,
+      description: '忽略小于此高度的图片',
+    },
+  ],
+  execute: async (params, context: ExecutionContext) => {
+    const { browser, onProgress, onLog } = context;
+    const url = params.url as string;
+    const imageSelector = (params.imageSelector as string) ?? 'img';
+    const minWidth = (params.minWidth as number) ?? 100;
+    const minHeight = (params.minHeight as number) ?? 100;
+
+    onProgress('正在导航到页面...', 0);
+    onLog('info', `访问: ${url}`);
+    await browser.goto(url);
+
+    onProgress('正在提取图片...', 50);
+
+    const extractScript = `
+      ({ imageSelector, minWidth, minHeight }) => {
+        const images = document.querySelectorAll(imageSelector);
+        const results = [];
+
+        images.forEach(img => {
+          const width = img.naturalWidth || img.width;
+          const height = img.naturalHeight || img.height;
+
+          if (width >= minWidth && height >= minHeight) {
+            results.push({
+              src: img.src,
+              alt: img.alt || '',
+              width,
+              height,
+            });
+          }
+        });
+
+        return results;
+      }
+    `;
+
+    const result = await browser.eval(extractScript, [{
+      imageSelector,
+      minWidth,
+      minHeight,
+    }]);
+
+    const images = result as Array<{
+      src: string;
+      alt: string;
+      width: number;
+      height: number;
+    }>;
+
+    onLog('info', `找到 ${images.length} 张图片`);
+
+    onProgress('完成', 100);
+    return {
+      url: await browser.url(),
+      count: images.length,
+      images,
+    };
+  },
+};
+
+// ============================================================================
+// Template: Scrape Links
+// ============================================================================
+
+const scrapeLinksTemplate: TaskTemplate = {
+  id: 'scrape_links',
+  name: '抓取链接列表',
+  description: '提取页面中的所有链接',
+  category: 'scraping',
+  icon: 'Link',
+  params: [
+    {
+      key: 'url',
+      label: '网页地址',
+      type: 'url',
+      required: true,
+      placeholder: 'https://example.com',
+    },
+    {
+      key: 'linkSelector',
+      label: '链接选择器',
+      type: 'text',
+      required: false,
+      default: 'a[href]',
+      placeholder: 'a[href]',
+      description: '链接元素的 CSS 选择器',
+    },
+    {
+      key: 'filterPattern',
+      label: 'URL 过滤',
+      type: 'text',
+      required: false,
+      placeholder: 'example.com',
+      description: '只保留包含此文本的链接',
+    },
+    {
+      key: 'excludePattern',
+      label: '排除模式',
+      type: 'text',
+      required: false,
+      placeholder: '#, javascript:',
+      description: '排除包含此文本的链接',
+    },
+  ],
+  execute: async (params, context: ExecutionContext) => {
+    const { browser, onProgress, onLog } = context;
+    const url = params.url as string;
+    const linkSelector = (params.linkSelector as string) ?? 'a[href]';
+    const filterPattern = params.filterPattern as string | undefined;
+    const excludePattern = params.excludePattern as string | undefined;
+
+    onProgress('正在导航到页面...', 0);
+    onLog('info', `访问: ${url}`);
+    await browser.goto(url);
+
+    onProgress('正在提取链接...', 50);
+
+    const extractScript = `
+      ({ linkSelector, filterPattern, excludePattern }) => {
+        const links = document.querySelectorAll(linkSelector);
+        const results = [];
+        const seen = new Set();
+
+        links.forEach(a => {
+          const href = a.href;
+          const text = a.textContent?.trim() || '';
+
+          if (!href || seen.has(href)) return;
+
+          // Apply filter
+          if (filterPattern && !href.includes(filterPattern) && !text.includes(filterPattern)) {
+            return;
+          }
+
+          // Apply exclude
+          if (excludePattern) {
+            const patterns = excludePattern.split(',').map(p => p.trim());
+            for (const p of patterns) {
+              if (href.includes(p)) return;
+            }
+          }
+
+          seen.add(href);
+          results.push({ href, text });
+        });
+
+        return results;
+      }
+    `;
+
+    const result = await browser.eval(extractScript, [{
+      linkSelector,
+      filterPattern,
+      excludePattern,
+    }]);
+
+    const links = result as Array<{ href: string; text: string }>;
+    onLog('info', `找到 ${links.length} 个链接`);
+
+    onProgress('完成', 100);
+    return {
+      url: await browser.url(),
+      count: links.length,
+      links,
+    };
+  },
+};
+
+// ============================================================================
+// Template: Scrape Table
+// ============================================================================
+
+const scrapeTableTemplate: TaskTemplate = {
+  id: 'scrape_table',
+  name: '抓取表格数据',
+  description: '从 HTML 表格中提取数据',
+  category: 'scraping',
+  icon: 'Table',
+  params: [
+    {
+      key: 'url',
+      label: '网页地址',
+      type: 'url',
+      required: true,
+      placeholder: 'https://example.com/data',
+    },
+    {
+      key: 'tableSelector',
+      label: '表格选择器',
+      type: 'text',
+      required: false,
+      default: 'table',
+      placeholder: 'table.data-table',
+      description: '表格元素的 CSS 选择器',
+    },
+    {
+      key: 'headerRow',
+      label: '表头行',
+      type: 'number',
+      required: false,
+      default: 1,
+      min: 0,
+      max: 10,
+      description: '表头所在行（0 表示无表头）',
+    },
+  ],
+  execute: async (params, context: ExecutionContext) => {
+    const { browser, onProgress, onLog } = context;
+    const url = params.url as string;
+    const tableSelector = (params.tableSelector as string) ?? 'table';
+    const headerRow = (params.headerRow as number) ?? 1;
+
+    onProgress('正在导航到页面...', 0);
+    onLog('info', `访问: ${url}`);
+    await browser.goto(url);
+
+    onProgress('正在提取表格数据...', 50);
+
+    const extractScript = `
+      ({ tableSelector, headerRow }) => {
+        const table = document.querySelector(tableSelector);
+        if (!table) return { headers: [], rows: [] };
+
+        const allRows = table.querySelectorAll('tr');
+
+        // Extract headers
+        let headers = [];
+        if (headerRow > 0 && allRows[headerRow - 1]) {
+          const headerCells = allRows[headerRow - 1].querySelectorAll('th, td');
+          headers = Array.from(headerCells).map(cell => cell.textContent?.trim() || '');
+        }
+
+        // Extract data rows
+        const startRow = headerRow > 0 ? headerRow : 0;
+        const rows = [];
+
+        for (let i = startRow; i < allRows.length; i++) {
+          const cells = allRows[i].querySelectorAll('td, th');
+          const rowData = Array.from(cells).map(cell => cell.textContent?.trim() || '');
+          if (rowData.some(d => d)) { // Skip empty rows
+            rows.push(rowData);
+          }
+        }
+
+        return { headers, rows };
+      }
+    `;
+
+    const result = await browser.eval(extractScript, [{ tableSelector, headerRow }]) as {
+      headers: string[];
+      rows: string[][];
+    };
+
+    onLog('info', `提取了 ${result.rows.length} 行数据，${result.headers.length} 列`);
+
+    onProgress('完成', 100);
+    return {
+      url: await browser.url(),
+      headers: result.headers,
+      rowCount: result.rows.length,
+      data: result.rows,
+    };
+  },
+};
+
+// ============================================================================
+// Export All Scraping Templates
+// ============================================================================
+
+export const scrapingTemplates: TaskTemplate[] = [
+  scrapeTextTemplate,
+  scrapeListTemplate,
+  scrapeImagesTemplate,
+  scrapeLinksTemplate,
+  scrapeTableTemplate,
+];