feat(browser-hand): implement Browser Hand UI components
Add complete Browser Hand UI system for browser automation: Components: - BrowserHandCard: Main card with status display and screenshot preview - TaskTemplateModal: Template selection and parameter configuration - ScreenshotPreview: Screenshot display with fullscreen capability Templates: - Basic operations: navigate, screenshot, form fill, click, execute JS - Scraping: text, list, images, links, tables - Automation: login+action, multi-page, monitoring, pagination Features: - 15 built-in task templates across 3 categories - Real-time execution status with progress bar - Screenshot preview with zoom and fullscreen - Integration with HandsPanel for seamless UX - Zustand store for state management - Comprehensive test coverage (16 tests) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
535
desktop/src/components/BrowserHand/templates/scraping.ts
Normal file
535
desktop/src/components/BrowserHand/templates/scraping.ts
Normal file
@@ -0,0 +1,535 @@
|
||||
/**
|
||||
* Scraping Templates for Browser Hand
|
||||
*
|
||||
* Contains data scraping and extraction templates.
|
||||
*/
|
||||
|
||||
import type { TaskTemplate, ExecutionContext } from './types';
|
||||
|
||||
// ============================================================================
|
||||
// Template: Scrape Text
|
||||
// ============================================================================
|
||||
|
||||
const scrapeTextTemplate: TaskTemplate = {
|
||||
id: 'scrape_text',
|
||||
name: '抓取页面文本',
|
||||
description: '从多个选择器提取文本内容',
|
||||
category: 'scraping',
|
||||
icon: 'FileText',
|
||||
params: [
|
||||
{
|
||||
key: 'url',
|
||||
label: '网页地址',
|
||||
type: 'url',
|
||||
required: true,
|
||||
placeholder: 'https://example.com',
|
||||
},
|
||||
{
|
||||
key: 'selectors',
|
||||
label: '选择器列表',
|
||||
type: 'textarea',
|
||||
required: true,
|
||||
placeholder: '.title\n.description\n.price',
|
||||
description: 'CSS 选择器(每行一个)',
|
||||
},
|
||||
{
|
||||
key: 'waitFor',
|
||||
label: '等待元素',
|
||||
type: 'text',
|
||||
required: false,
|
||||
placeholder: '.content',
|
||||
description: '等待此元素出现后再抓取',
|
||||
},
|
||||
],
|
||||
execute: async (params, context: ExecutionContext) => {
|
||||
const { browser, onProgress, onLog } = context;
|
||||
const url = params.url as string;
|
||||
const selectorsText = params.selectors as string;
|
||||
const waitFor = params.waitFor as string | undefined;
|
||||
const selectors = selectorsText.split('\n').map((s) => s.trim()).filter(Boolean);
|
||||
|
||||
onProgress('正在导航到页面...', 0);
|
||||
onLog('info', `访问: ${url}`);
|
||||
await browser.goto(url);
|
||||
|
||||
if (waitFor) {
|
||||
onProgress('等待页面加载...', 20);
|
||||
onLog('action', `等待元素: ${waitFor}`);
|
||||
await browser.wait(waitFor, 10000);
|
||||
}
|
||||
|
||||
onProgress('正在抓取文本...', 50);
|
||||
const result: Record<string, string | string[]> = {};
|
||||
|
||||
for (let i = 0; i < selectors.length; i++) {
|
||||
const selector = selectors[i];
|
||||
const progress = 50 + Math.floor((i / selectors.length) * 40);
|
||||
|
||||
onProgress(`正在抓取 ${i + 1}/${selectors.length}...`, progress);
|
||||
|
||||
try {
|
||||
// Try to get multiple elements first
|
||||
const multipleResult = await browser.eval(`
|
||||
(selector) => {
|
||||
const elements = document.querySelectorAll(selector);
|
||||
if (elements.length > 1) {
|
||||
return Array.from(elements).map(el => el.textContent?.trim() || '');
|
||||
} else if (elements.length === 1) {
|
||||
return elements[0].textContent?.trim() || '';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
`, [selector]);
|
||||
|
||||
if (multipleResult !== null) {
|
||||
result[selector] = multipleResult as string | string[];
|
||||
onLog('info', `抓取成功: ${selector}`);
|
||||
} else {
|
||||
result[selector] = '';
|
||||
onLog('warn', `未找到元素: ${selector}`);
|
||||
}
|
||||
} catch (error) {
|
||||
result[selector] = '';
|
||||
onLog('error', `抓取失败: ${selector}`, { error: String(error) });
|
||||
}
|
||||
}
|
||||
|
||||
onProgress('完成', 100);
|
||||
return { url: await browser.url(), data: result };
|
||||
},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Template: Scrape List
|
||||
// ============================================================================
|
||||
|
||||
const scrapeListTemplate: TaskTemplate = {
|
||||
id: 'scrape_list',
|
||||
name: '提取列表数据',
|
||||
description: '从重复元素中批量提取结构化数据',
|
||||
category: 'scraping',
|
||||
icon: 'List',
|
||||
params: [
|
||||
{
|
||||
key: 'url',
|
||||
label: '网页地址',
|
||||
type: 'url',
|
||||
required: true,
|
||||
placeholder: 'https://example.com/products',
|
||||
},
|
||||
{
|
||||
key: 'itemSelector',
|
||||
label: '列表项选择器',
|
||||
type: 'text',
|
||||
required: true,
|
||||
placeholder: '.product-item',
|
||||
description: '每个列表项的 CSS 选择器',
|
||||
},
|
||||
{
|
||||
key: 'fieldMappings',
|
||||
label: '字段映射',
|
||||
type: 'json',
|
||||
required: true,
|
||||
default: {},
|
||||
description: 'JSON 对象,映射字段名到选择器',
|
||||
placeholder: '{"title": ".title", "price": ".price", "link": "a@href"}',
|
||||
},
|
||||
{
|
||||
key: 'limit',
|
||||
label: '最大数量',
|
||||
type: 'number',
|
||||
required: false,
|
||||
default: 50,
|
||||
min: 1,
|
||||
max: 500,
|
||||
description: '最多提取多少条数据',
|
||||
},
|
||||
],
|
||||
execute: async (params, context: ExecutionContext) => {
|
||||
const { browser, onProgress, onLog } = context;
|
||||
const url = params.url as string;
|
||||
const itemSelector = params.itemSelector as string;
|
||||
const fieldMappings = params.fieldMappings as Record<string, string>;
|
||||
const limit = (params.limit as number) ?? 50;
|
||||
|
||||
onProgress('正在导航到页面...', 0);
|
||||
onLog('info', `访问: ${url}`);
|
||||
await browser.goto(url);
|
||||
|
||||
onProgress('等待列表加载...', 30);
|
||||
await browser.wait(itemSelector, 10000);
|
||||
|
||||
onProgress('正在提取列表数据...', 50);
|
||||
|
||||
const scrapingScript = `
|
||||
({ itemSelector, fieldMappings, limit }) => {
|
||||
const items = document.querySelectorAll(itemSelector);
|
||||
const results = [];
|
||||
|
||||
for (let i = 0; i < Math.min(items.length, limit); i++) {
|
||||
const item = items[i];
|
||||
const row = {};
|
||||
|
||||
for (const [field, selector] of Object.entries(fieldMappings)) {
|
||||
// Handle attribute selectors like "a@href"
|
||||
const parts = selector.split('@');
|
||||
const cssSelector = parts[0];
|
||||
const attr = parts[1];
|
||||
|
||||
const el = item.querySelector(cssSelector);
|
||||
if (el) {
|
||||
if (attr) {
|
||||
row[field] = el.getAttribute(attr) || '';
|
||||
} else {
|
||||
row[field] = el.textContent?.trim() || '';
|
||||
}
|
||||
} else {
|
||||
row[field] = '';
|
||||
}
|
||||
}
|
||||
|
||||
results.push(row);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
`;
|
||||
|
||||
const result = await browser.eval(scrapingScript, [{
|
||||
itemSelector,
|
||||
fieldMappings,
|
||||
limit,
|
||||
}]);
|
||||
|
||||
const items = result as Array<Record<string, string>>;
|
||||
onLog('info', `提取了 ${items.length} 条数据`);
|
||||
|
||||
onProgress('完成', 100);
|
||||
return {
|
||||
url: await browser.url(),
|
||||
count: items.length,
|
||||
data: items,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Template: Scrape Images
|
||||
// ============================================================================
|
||||
|
||||
const scrapeImagesTemplate: TaskTemplate = {
|
||||
id: 'scrape_images',
|
||||
name: '抓取图片列表',
|
||||
description: '提取页面中的图片 URL',
|
||||
category: 'scraping',
|
||||
icon: 'Image',
|
||||
params: [
|
||||
{
|
||||
key: 'url',
|
||||
label: '网页地址',
|
||||
type: 'url',
|
||||
required: true,
|
||||
placeholder: 'https://example.com/gallery',
|
||||
},
|
||||
{
|
||||
key: 'imageSelector',
|
||||
label: '图片选择器',
|
||||
type: 'text',
|
||||
required: false,
|
||||
default: 'img',
|
||||
placeholder: 'img.gallery-image',
|
||||
description: '图片元素的 CSS 选择器',
|
||||
},
|
||||
{
|
||||
key: 'minWidth',
|
||||
label: '最小宽度',
|
||||
type: 'number',
|
||||
required: false,
|
||||
default: 100,
|
||||
description: '忽略小于此宽度的图片',
|
||||
},
|
||||
{
|
||||
key: 'minHeight',
|
||||
label: '最小高度',
|
||||
type: 'number',
|
||||
required: false,
|
||||
default: 100,
|
||||
description: '忽略小于此高度的图片',
|
||||
},
|
||||
],
|
||||
execute: async (params, context: ExecutionContext) => {
|
||||
const { browser, onProgress, onLog } = context;
|
||||
const url = params.url as string;
|
||||
const imageSelector = (params.imageSelector as string) ?? 'img';
|
||||
const minWidth = (params.minWidth as number) ?? 100;
|
||||
const minHeight = (params.minHeight as number) ?? 100;
|
||||
|
||||
onProgress('正在导航到页面...', 0);
|
||||
onLog('info', `访问: ${url}`);
|
||||
await browser.goto(url);
|
||||
|
||||
onProgress('正在提取图片...', 50);
|
||||
|
||||
const extractScript = `
|
||||
({ imageSelector, minWidth, minHeight }) => {
|
||||
const images = document.querySelectorAll(imageSelector);
|
||||
const results = [];
|
||||
|
||||
images.forEach(img => {
|
||||
const width = img.naturalWidth || img.width;
|
||||
const height = img.naturalHeight || img.height;
|
||||
|
||||
if (width >= minWidth && height >= minHeight) {
|
||||
results.push({
|
||||
src: img.src,
|
||||
alt: img.alt || '',
|
||||
width,
|
||||
height,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
`;
|
||||
|
||||
const result = await browser.eval(extractScript, [{
|
||||
imageSelector,
|
||||
minWidth,
|
||||
minHeight,
|
||||
}]);
|
||||
|
||||
const images = result as Array<{
|
||||
src: string;
|
||||
alt: string;
|
||||
width: number;
|
||||
height: number;
|
||||
}>;
|
||||
|
||||
onLog('info', `找到 ${images.length} 张图片`);
|
||||
|
||||
onProgress('完成', 100);
|
||||
return {
|
||||
url: await browser.url(),
|
||||
count: images.length,
|
||||
images,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Template: Scrape Links
|
||||
// ============================================================================
|
||||
|
||||
const scrapeLinksTemplate: TaskTemplate = {
|
||||
id: 'scrape_links',
|
||||
name: '抓取链接列表',
|
||||
description: '提取页面中的所有链接',
|
||||
category: 'scraping',
|
||||
icon: 'Link',
|
||||
params: [
|
||||
{
|
||||
key: 'url',
|
||||
label: '网页地址',
|
||||
type: 'url',
|
||||
required: true,
|
||||
placeholder: 'https://example.com',
|
||||
},
|
||||
{
|
||||
key: 'linkSelector',
|
||||
label: '链接选择器',
|
||||
type: 'text',
|
||||
required: false,
|
||||
default: 'a[href]',
|
||||
placeholder: 'a[href]',
|
||||
description: '链接元素的 CSS 选择器',
|
||||
},
|
||||
{
|
||||
key: 'filterPattern',
|
||||
label: 'URL 过滤',
|
||||
type: 'text',
|
||||
required: false,
|
||||
placeholder: 'example.com',
|
||||
description: '只保留包含此文本的链接',
|
||||
},
|
||||
{
|
||||
key: 'excludePattern',
|
||||
label: '排除模式',
|
||||
type: 'text',
|
||||
required: false,
|
||||
placeholder: '#, javascript:',
|
||||
description: '排除包含此文本的链接',
|
||||
},
|
||||
],
|
||||
execute: async (params, context: ExecutionContext) => {
|
||||
const { browser, onProgress, onLog } = context;
|
||||
const url = params.url as string;
|
||||
const linkSelector = (params.linkSelector as string) ?? 'a[href]';
|
||||
const filterPattern = params.filterPattern as string | undefined;
|
||||
const excludePattern = params.excludePattern as string | undefined;
|
||||
|
||||
onProgress('正在导航到页面...', 0);
|
||||
onLog('info', `访问: ${url}`);
|
||||
await browser.goto(url);
|
||||
|
||||
onProgress('正在提取链接...', 50);
|
||||
|
||||
const extractScript = `
|
||||
({ linkSelector, filterPattern, excludePattern }) => {
|
||||
const links = document.querySelectorAll(linkSelector);
|
||||
const results = [];
|
||||
const seen = new Set();
|
||||
|
||||
links.forEach(a => {
|
||||
const href = a.href;
|
||||
const text = a.textContent?.trim() || '';
|
||||
|
||||
if (!href || seen.has(href)) return;
|
||||
|
||||
// Apply filter
|
||||
if (filterPattern && !href.includes(filterPattern) && !text.includes(filterPattern)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Apply exclude
|
||||
if (excludePattern) {
|
||||
const patterns = excludePattern.split(',').map(p => p.trim());
|
||||
for (const p of patterns) {
|
||||
if (href.includes(p)) return;
|
||||
}
|
||||
}
|
||||
|
||||
seen.add(href);
|
||||
results.push({ href, text });
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
`;
|
||||
|
||||
const result = await browser.eval(extractScript, [{
|
||||
linkSelector,
|
||||
filterPattern,
|
||||
excludePattern,
|
||||
}]);
|
||||
|
||||
const links = result as Array<{ href: string; text: string }>;
|
||||
onLog('info', `找到 ${links.length} 个链接`);
|
||||
|
||||
onProgress('完成', 100);
|
||||
return {
|
||||
url: await browser.url(),
|
||||
count: links.length,
|
||||
links,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Template: Scrape Table
|
||||
// ============================================================================
|
||||
|
||||
const scrapeTableTemplate: TaskTemplate = {
|
||||
id: 'scrape_table',
|
||||
name: '抓取表格数据',
|
||||
description: '从 HTML 表格中提取数据',
|
||||
category: 'scraping',
|
||||
icon: 'Table',
|
||||
params: [
|
||||
{
|
||||
key: 'url',
|
||||
label: '网页地址',
|
||||
type: 'url',
|
||||
required: true,
|
||||
placeholder: 'https://example.com/data',
|
||||
},
|
||||
{
|
||||
key: 'tableSelector',
|
||||
label: '表格选择器',
|
||||
type: 'text',
|
||||
required: false,
|
||||
default: 'table',
|
||||
placeholder: 'table.data-table',
|
||||
description: '表格元素的 CSS 选择器',
|
||||
},
|
||||
{
|
||||
key: 'headerRow',
|
||||
label: '表头行',
|
||||
type: 'number',
|
||||
required: false,
|
||||
default: 1,
|
||||
min: 0,
|
||||
max: 10,
|
||||
description: '表头所在行(0 表示无表头)',
|
||||
},
|
||||
],
|
||||
execute: async (params, context: ExecutionContext) => {
|
||||
const { browser, onProgress, onLog } = context;
|
||||
const url = params.url as string;
|
||||
const tableSelector = (params.tableSelector as string) ?? 'table';
|
||||
const headerRow = (params.headerRow as number) ?? 1;
|
||||
|
||||
onProgress('正在导航到页面...', 0);
|
||||
onLog('info', `访问: ${url}`);
|
||||
await browser.goto(url);
|
||||
|
||||
onProgress('正在提取表格数据...', 50);
|
||||
|
||||
const extractScript = `
|
||||
({ tableSelector, headerRow }) => {
|
||||
const table = document.querySelector(tableSelector);
|
||||
if (!table) return { headers: [], rows: [] };
|
||||
|
||||
const allRows = table.querySelectorAll('tr');
|
||||
|
||||
// Extract headers
|
||||
let headers = [];
|
||||
if (headerRow > 0 && allRows[headerRow - 1]) {
|
||||
const headerCells = allRows[headerRow - 1].querySelectorAll('th, td');
|
||||
headers = Array.from(headerCells).map(cell => cell.textContent?.trim() || '');
|
||||
}
|
||||
|
||||
// Extract data rows
|
||||
const startRow = headerRow > 0 ? headerRow : 0;
|
||||
const rows = [];
|
||||
|
||||
for (let i = startRow; i < allRows.length; i++) {
|
||||
const cells = allRows[i].querySelectorAll('td, th');
|
||||
const rowData = Array.from(cells).map(cell => cell.textContent?.trim() || '');
|
||||
if (rowData.some(d => d)) { // Skip empty rows
|
||||
rows.push(rowData);
|
||||
}
|
||||
}
|
||||
|
||||
return { headers, rows };
|
||||
}
|
||||
`;
|
||||
|
||||
const result = await browser.eval(extractScript, [{ tableSelector, headerRow }]) as {
|
||||
headers: string[];
|
||||
rows: string[][];
|
||||
};
|
||||
|
||||
onLog('info', `提取了 ${result.rows.length} 行数据,${result.headers.length} 列`);
|
||||
|
||||
onProgress('完成', 100);
|
||||
return {
|
||||
url: await browser.url(),
|
||||
headers: result.headers,
|
||||
rowCount: result.rows.length,
|
||||
data: result.rows,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Export All Scraping Templates
|
||||
// ============================================================================
|
||||
|
||||
export const scrapingTemplates: TaskTemplate[] = [
|
||||
scrapeTextTemplate,
|
||||
scrapeListTemplate,
|
||||
scrapeImagesTemplate,
|
||||
scrapeLinksTemplate,
|
||||
scrapeTableTemplate,
|
||||
];
|
||||
Reference in New Issue
Block a user