Files
zclaw_openfang/desktop/src/lib/llm-service.ts
iven 02a4ba5e75
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix(desktop): 替换 require() 为 ES import — 修复生产构建崩溃
- connectionStore: 2 处 require() → loadConversationStore() 异步预加载 + 闭包引用
- saasStore: 1 处 require() → await import()(logout 是 async)
- llm-service: 1 处 require() → 顶层 import(无循环依赖)
- streamStore: 移除重复动态导入,统一使用顶层 useConnectionStore
- tsc --noEmit 0 errors
2026-04-15 00:47:29 +08:00

879 lines
25 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* LLM Service Adapter - Unified LLM interface for L4 self-evolution engines
*
* Provides a unified interface for:
* - ReflectionEngine: Semantic analysis + deep reflection
* - ContextCompactor: High-quality summarization
* - MemoryExtractor: Semantic importance scoring
*
* Supports multiple backends:
* - OpenAI (GPT-4, GPT-3.5)
* - Volcengine (Doubao)
* - ZCLAW Gateway (passthrough)
*
* Part of ZCLAW L4 Self-Evolution capability.
*/
import { DEFAULT_MODEL_ID, DEFAULT_OPENAI_BASE_URL } from '../constants/models';
import { createLogger } from './logger';
import { recordLLMUsage } from './telemetry-collector';
const log = createLogger('LLMService');
// === Types ===
export type LLMProvider = 'openai' | 'volcengine' | 'gateway' | 'saas' | 'mock';
export interface LLMConfig {
provider: LLMProvider;
model?: string;
apiKey?: string;
apiBase?: string;
maxTokens?: number;
temperature?: number;
timeout?: number;
}
export interface LLMMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
export interface LLMResponse {
content: string;
tokensUsed?: {
input: number;
output: number;
};
model?: string;
latencyMs?: number;
}
export interface LLMServiceAdapter {
complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse>;
isAvailable(): boolean;
getProvider(): LLMProvider;
}
// === Default Configs ===
const DEFAULT_CONFIGS: Record<LLMProvider, LLMConfig> = {
openai: {
provider: 'openai',
model: DEFAULT_MODEL_ID,
apiBase: DEFAULT_OPENAI_BASE_URL,
maxTokens: 2000,
temperature: 0.7,
timeout: 30000,
},
volcengine: {
provider: 'volcengine',
model: 'doubao-pro-32k',
apiBase: 'https://ark.cn-beijing.volces.com/api/v3',
maxTokens: 2000,
temperature: 0.7,
timeout: 30000,
},
gateway: {
provider: 'gateway',
apiBase: '/api/llm',
maxTokens: 2000,
temperature: 0.7,
timeout: 60000,
},
saas: {
provider: 'saas',
maxTokens: 4096,
temperature: 0.7,
timeout: 300000, // 5 min for streaming
},
mock: {
provider: 'mock',
maxTokens: 100,
temperature: 0,
timeout: 100,
},
};
// === Storage ===
const LLM_CONFIG_KEY = 'zclaw-llm-config';
// === Mock Adapter (for testing) ===
class MockLLMAdapter implements LLMServiceAdapter {
constructor(_config: LLMConfig) {
// Config is stored for future use (e.g., custom mock behavior based on config)
}
async complete(messages: LLMMessage[]): Promise<LLMResponse> {
// Simulate latency
await new Promise((resolve) => setTimeout(resolve, 50));
const lastMessage = messages[messages.length - 1];
const content = lastMessage?.content || '';
// Generate mock response based on content type
let response = '[Mock LLM Response] ';
if (content.includes('reflect') || content.includes('反思')) {
response += JSON.stringify({
patterns: [
{
observation: '用户经常询问代码优化相关问题',
frequency: 5,
sentiment: 'positive',
evidence: ['多次讨论性能优化', '关注代码质量'],
},
],
improvements: [
{
area: '代码解释',
suggestion: '可以提供更详细的代码注释',
priority: 'medium',
},
],
identityProposals: [],
});
} else if (content.includes('summarize') || content.includes('摘要')) {
response += '这是一个关于对话内容的摘要,包含了主要讨论的要点和结论。';
} else if (content.includes('importance') || content.includes('重要性')) {
response += JSON.stringify({
memories: [
{ content: '用户偏好简洁的回答', importance: 7, type: 'preference' },
],
});
} else {
response += 'Processed: ' + content.slice(0, 50);
}
return {
content: response,
tokensUsed: { input: content.length / 4, output: response.length / 4 },
model: 'mock-model',
latencyMs: 50,
};
}
isAvailable(): boolean {
return true;
}
getProvider(): LLMProvider {
return 'mock';
}
}
// === OpenAI Adapter ===
class OpenAILLMAdapter implements LLMServiceAdapter {
private config: LLMConfig;
constructor(config: LLMConfig) {
this.config = { ...DEFAULT_CONFIGS.openai, ...config };
}
async complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse> {
const config = { ...this.config, ...options };
const startTime = Date.now();
if (!config.apiKey) {
throw new Error('[OpenAI] API key not configured');
}
const response = await fetch(`${config.apiBase}/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
model: config.model,
messages,
max_tokens: config.maxTokens,
temperature: config.temperature,
}),
signal: AbortSignal.timeout(config.timeout || 30000),
});
if (!response.ok) {
const errorBody = await response.text();
// Log full error in development only
if (import.meta.env.DEV) {
console.error('[OpenAI] API error:', errorBody);
}
// Return sanitized error to caller
throw new Error(`[OpenAI] API error: ${response.status} - Request failed`);
}
const data = await response.json();
const latencyMs = Date.now() - startTime;
return {
content: data.choices[0]?.message?.content || '',
tokensUsed: {
input: data.usage?.prompt_tokens || 0,
output: data.usage?.completion_tokens || 0,
},
model: data.model,
latencyMs,
};
}
isAvailable(): boolean {
return !!this.config.apiKey;
}
getProvider(): LLMProvider {
return 'openai';
}
}
// === Volcengine Adapter ===
class VolcengineLLMAdapter implements LLMServiceAdapter {
private config: LLMConfig;
constructor(config: LLMConfig) {
this.config = { ...DEFAULT_CONFIGS.volcengine, ...config };
}
async complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse> {
const config = { ...this.config, ...options };
const startTime = Date.now();
if (!config.apiKey) {
throw new Error('[Volcengine] API key not configured');
}
const response = await fetch(`${config.apiBase}/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
model: config.model,
messages,
max_tokens: config.maxTokens,
temperature: config.temperature,
}),
signal: AbortSignal.timeout(config.timeout || 30000),
});
if (!response.ok) {
const errorBody = await response.text();
// Log full error in development only
if (import.meta.env.DEV) {
console.error('[Volcengine] API error:', errorBody);
}
// Return sanitized error to caller
throw new Error(`[Volcengine] API error: ${response.status} - Request failed`);
}
const data = await response.json();
const latencyMs = Date.now() - startTime;
return {
content: data.choices[0]?.message?.content || '',
tokensUsed: {
input: data.usage?.prompt_tokens || 0,
output: data.usage?.completion_tokens || 0,
},
model: data.model,
latencyMs,
};
}
isAvailable(): boolean {
return !!this.config.apiKey;
}
getProvider(): LLMProvider {
return 'volcengine';
}
}
// === Gateway Adapter (pass through to ZCLAW or internal Kernel) ===
class GatewayLLMAdapter implements LLMServiceAdapter {
private config: LLMConfig;
constructor(config: LLMConfig) {
this.config = { ...DEFAULT_CONFIGS.gateway, ...config };
}
async complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse> {
const config = { ...this.config, ...options };
const startTime = Date.now();
// Build a single prompt from messages
const systemMessage = messages.find(m => m.role === 'system')?.content || '';
const userMessage = messages.find(m => m.role === 'user')?.content || '';
// Combine system and user messages into a single prompt
const fullPrompt = systemMessage
? `${systemMessage}\n\n${userMessage}`
: userMessage;
// Check if running in Tauri with internal kernel
// Use the same detection as kernel-client.ts
const isTauri = typeof window !== 'undefined' &&
'__TAURI_INTERNALS__' in window;
if (isTauri) {
// Use internal Kernel via Tauri invoke
try {
const { invoke } = await import('@tauri-apps/api/core');
// Get the default agent ID from connectionStore or use the first agent
const agentId = localStorage.getItem('zclaw-default-agent-id');
const response = await invoke<{ content: string; input_tokens: number; output_tokens: number }>('agent_chat', {
request: {
agentId: agentId || null, // null will use default agent
message: fullPrompt,
},
});
const latencyMs = Date.now() - startTime;
return {
content: response.content || '',
tokensUsed: {
input: response.input_tokens || 0,
output: response.output_tokens || 0,
},
latencyMs,
};
} catch (err) {
console.error('[LLMService] Kernel chat failed:', err);
const message = err instanceof Error ? err.message : String(err);
throw new Error(`[Gateway] Kernel chat failed: ${message}`);
}
}
// External Gateway mode: Use ZCLAW's chat endpoint
const agentId = localStorage.getItem('zclaw-default-agent-id') || 'default';
const response = await fetch(`/api/agents/${agentId}/message`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
message: fullPrompt,
max_tokens: config.maxTokens,
temperature: config.temperature ?? 0.3, // Lower temperature for extraction tasks
}),
signal: AbortSignal.timeout(config.timeout || 60000),
});
if (!response.ok) {
const error = await response.text();
// If agent not found, try without agent ID (direct /api/chat)
if (response.status === 404) {
const fallbackResponse = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
message: fullPrompt,
max_tokens: config.maxTokens,
temperature: config.temperature ?? 0.3,
}),
signal: AbortSignal.timeout(config.timeout || 60000),
});
if (!fallbackResponse.ok) {
throw new Error(`[Gateway] Both endpoints failed: ${fallbackResponse.status}`);
}
const data = await fallbackResponse.json();
const latencyMs = Date.now() - startTime;
return {
content: data.response || data.content || '',
tokensUsed: { input: data.input_tokens || 0, output: data.output_tokens || 0 },
latencyMs,
};
}
throw new Error(`[Gateway] API error: ${response.status} - ${error}`);
}
const data = await response.json();
const latencyMs = Date.now() - startTime;
return {
content: data.response || data.content || '',
tokensUsed: { input: data.input_tokens || 0, output: data.output_tokens || 0 },
latencyMs,
};
}
isAvailable(): boolean {
// Gateway is available if we're in browser (can connect to ZCLAW)
return typeof window !== 'undefined';
}
getProvider(): LLMProvider {
return 'gateway';
}
}
// === SaaS Relay Adapter (via SaaS backend) ===
class SaasLLMAdapter implements LLMServiceAdapter {
private config: LLMConfig;
constructor(config: LLMConfig) {
this.config = { ...DEFAULT_CONFIGS.saas, ...config };
}
async complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse> {
const config = { ...this.config, ...options };
const startTime = Date.now();
// Dynamic import to avoid circular dependency
const { useSaaSStore } = await import('../store/saasStore');
const { saasUrl, authToken } = useSaaSStore.getState();
if (!saasUrl || !authToken) {
throw new Error('[SaaS] 未登录 SaaS 平台,请先在设置中登录');
}
// Dynamic import of SaaSClient singleton
const { saasClient } = await import('./saas-client');
saasClient.setBaseUrl(saasUrl);
saasClient.setToken(authToken);
const openaiBody = {
model: config.model || 'default',
messages,
max_tokens: config.maxTokens || 4096,
temperature: config.temperature ?? 0.7,
stream: false,
};
const response = await saasClient.chatCompletion(
openaiBody,
AbortSignal.timeout(config.timeout || 300000),
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({
error: 'unknown',
message: `SaaS relay 请求失败 (${response.status})`,
}));
throw new Error(
`[SaaS] ${errorData.message || errorData.error || `请求失败: ${response.status}`}`,
);
}
const data = await response.json();
const latencyMs = Date.now() - startTime;
const result = {
content: data.choices?.[0]?.message?.content || '',
tokensUsed: {
input: data.usage?.prompt_tokens || 0,
output: data.usage?.completion_tokens || 0,
},
model: data.model,
latencyMs,
};
// Record telemetry for SaaS relay usage
try {
const { recordLLMUsage } = await import('./telemetry-collector');
recordLLMUsage(
result.model || 'saas-relay',
result.tokensUsed.input,
result.tokensUsed.output,
{ latencyMs, success: true, connectionMode: 'saas' },
);
} catch (e) {
log.debug('Failed to record LLM telemetry', { error: e });
}
return result;
}
isAvailable(): boolean {
// Check synchronously via localStorage for availability check.
// Auth is cookie-based — check connection mode + URL presence.
try {
const mode = localStorage.getItem('zclaw-connection-mode');
const saasUrl = localStorage.getItem('zclaw-saas-url');
return mode === 'saas' && !!saasUrl;
} catch (e) {
log.debug('Failed to check SaaS adapter availability', { error: e });
return false;
}
}
getProvider(): LLMProvider {
return 'saas';
}
}
// === Factory ===
let cachedAdapter: LLMServiceAdapter | null = null;
export function createLLMAdapter(config?: Partial<LLMConfig>): LLMServiceAdapter {
const savedConfig = loadConfig();
const finalConfig = { ...savedConfig, ...config };
switch (finalConfig.provider) {
case 'openai':
return new OpenAILLMAdapter(finalConfig);
case 'volcengine':
return new VolcengineLLMAdapter(finalConfig);
case 'gateway':
return new GatewayLLMAdapter(finalConfig);
case 'saas':
return new SaasLLMAdapter(finalConfig);
case 'mock':
default:
return new MockLLMAdapter(finalConfig);
}
}
export function getLLMAdapter(): LLMServiceAdapter {
if (!cachedAdapter) {
cachedAdapter = createLLMAdapter();
}
return cachedAdapter;
}
export function resetLLMAdapter(): void {
cachedAdapter = null;
}
// === Config Management ===
export function loadConfig(): LLMConfig {
if (typeof window === 'undefined') {
return DEFAULT_CONFIGS.mock;
}
try {
const saved = localStorage.getItem(LLM_CONFIG_KEY);
if (saved) {
return JSON.parse(saved);
}
} catch (e) {
log.debug('Failed to parse LLM config', { error: e });
}
// Default to gateway (ZCLAW passthrough) for L4 self-evolution
return DEFAULT_CONFIGS.gateway;
}
export function saveConfig(config: LLMConfig): void {
if (typeof window === 'undefined') return;
// Don't save API key to localStorage for security
const safeConfig = { ...config };
delete safeConfig.apiKey;
localStorage.setItem(LLM_CONFIG_KEY, JSON.stringify(safeConfig));
// Mark config as dirty for SaaS push sync
localStorage.setItem('zclaw-config-dirty.llm.default', '1');
resetLLMAdapter();
}
// === Prompt Templates ===
// 硬编码默认值 — 当 SaaS 不可用且本地无缓存时的终极兜底
const HARDCODED_PROMPTS: Record<string, { system: string; user: (arg: string) => string }> = {
reflection: {
system: `你是一个 AI Agent 的自我反思引擎。分析最近的对话历史,识别行为模式,并生成改进建议。
输出 JSON 格式:
{
"patterns": [
{
"observation": "观察到的模式描述",
"frequency": 数字,
"sentiment": "positive/negative/neutral",
"evidence": ["证据1", "证据2"]
}
],
"improvements": [
{
"area": "改进领域",
"suggestion": "具体建议",
"priority": "high/medium/low"
}
],
"identityProposals": []
}`,
user: (context: string) => `分析以下对话历史,进行自我反思:\n\n${context}\n\n请识别行为模式积极和消极并提供具体的改进建议。`,
},
compaction: {
system: `你是一个对话摘要专家。将长对话压缩为简洁的摘要,保留关键信息。
要求:
1. 保留所有重要决策和结论
2. 保留用户偏好和约束
3. 保留未完成的任务
4. 保持时间顺序
5. 摘要应能在后续对话中替代原始内容`,
user: (messages: string) => `请将以下对话压缩为简洁摘要,保留关键信息:\n\n${messages}`,
},
extraction: {
system: `你是一个记忆提取专家。从对话中提取值得长期记住的信息。
提取类型:
- fact: 用户告知的事实(如"我的公司叫XXX"
- preference: 用户的偏好(如"我喜欢简洁的回答"
- lesson: 本次对话的经验教训
- task: 未完成的任务或承诺
输出 JSON 数组:
[
{
"content": "记忆内容",
"type": "fact/preference/lesson/task",
"importance": 1-10,
"tags": ["标签1", "标签2"]
}
]`,
user: (conversation: string) => `从以下对话中提取值得长期记住的信息:\n\n${conversation}\n\n如果没有值得记忆的内容返回空数组 []。`,
},
};
// === Prompt Cache (SaaS OTA) ===
const PROMPT_CACHE_KEY = 'zclaw-prompt-cache';
interface CachedPrompt {
name: string;
version: number;
source: string;
system: string;
userTemplate: string | null;
syncedAt: string;
}
/** 获取本地缓存的提示词版本号映射 */
function loadPromptCache(): Record<string, CachedPrompt> {
if (typeof window === 'undefined') return {};
try {
const raw = localStorage.getItem(PROMPT_CACHE_KEY);
return raw ? JSON.parse(raw) : {};
} catch (e) {
log.debug('Failed to parse prompt cache', { error: e });
return {};
}
}
/** 保存提示词缓存到 localStorage */
function savePromptCache(cache: Record<string, CachedPrompt>): void {
if (typeof window === 'undefined') return;
localStorage.setItem(PROMPT_CACHE_KEY, JSON.stringify(cache));
}
/**
* 获取指定提示词的系统提示词
* 优先级:本地缓存 → 硬编码默认值
*/
export function getSystemPrompt(name: string): string {
const cache = loadPromptCache();
if (cache[name]?.system) {
return cache[name].system;
}
return HARDCODED_PROMPTS[name]?.system ?? '';
}
/**
* 获取指定提示词的用户提示词模板
* 优先级:本地缓存 → 硬编码默认值
*/
export function getUserPromptTemplate(name: string): string | ((arg: string) => string) | null {
const cache = loadPromptCache();
if (cache[name]) {
const tmpl = cache[name].userTemplate;
if (tmpl) return tmpl;
}
return HARDCODED_PROMPTS[name]?.user ?? null;
}
/** 获取提示词当前版本号(本地缓存) */
export function getPromptVersion(name: string): number {
const cache = loadPromptCache();
return cache[name]?.version ?? 0;
}
/** 获取所有本地缓存的提示词版本(用于 OTA 检查) */
export function getAllPromptVersions(): Record<string, number> {
const cache = loadPromptCache();
const versions: Record<string, number> = {};
for (const [name, entry] of Object.entries(cache)) {
versions[name] = entry.version;
}
return versions;
}
/**
* 应用 SaaS OTA 更新到本地缓存
* @param updates 从 SaaS 拉取的更新列表
*/
export function applyPromptUpdates(updates: Array<{
name: string;
version: number;
system_prompt: string;
user_prompt_template: string | null;
source: string;
changelog?: string | null;
}>): number {
const cache = loadPromptCache();
let applied = 0;
for (const update of updates) {
cache[update.name] = {
name: update.name,
version: update.version,
source: update.source,
system: update.system_prompt,
userTemplate: update.user_prompt_template,
syncedAt: new Date().toISOString(),
};
applied++;
}
if (applied > 0) {
savePromptCache(cache);
}
return applied;
}
/**
* 后台异步检查 SaaS 提示词更新
* 启动时和每 30 分钟调用一次
*/
let promptSyncTimer: ReturnType<typeof setInterval> | null = null;
export function startPromptOTASync(deviceId: string): void {
if (promptSyncTimer) return; // 已启动
if (typeof window === 'undefined') return;
const doSync = async () => {
try {
const { saasClient } = await import('./saas-client');
const { useSaaSStore } = await import('../store/saasStore');
const { saasUrl, authToken } = useSaaSStore.getState();
if (!saasUrl || !authToken) return;
saasClient.setBaseUrl(saasUrl);
saasClient.setToken(authToken);
const versions = getAllPromptVersions();
const result = await saasClient.checkPromptUpdates(deviceId, versions);
if (result.updates.length > 0) {
const applied = applyPromptUpdates(result.updates);
if (applied > 0) {
log.debug(`已更新 ${applied} 个提示词模板`);
}
}
} catch (err) {
// 静默失败,不影响正常使用
log.debug('检查更新失败:', err);
}
};
// 立即执行一次
doSync();
// 每 30 分钟检查一次
promptSyncTimer = setInterval(doSync, 30 * 60 * 1000);
}
export function stopPromptOTASync(): void {
if (promptSyncTimer) {
clearInterval(promptSyncTimer);
promptSyncTimer = null;
}
}
// 保留向后兼容的 LLM_PROMPTS 导出(读取走 PromptCache
export const LLM_PROMPTS = {
get reflection() { return { system: getSystemPrompt('reflection'), user: getUserPromptTemplate('reflection')! }; },
get compaction() { return { system: getSystemPrompt('compaction'), user: getUserPromptTemplate('compaction')! }; },
get extraction() { return { system: getSystemPrompt('extraction'), user: getUserPromptTemplate('extraction')! }; },
};
// === Telemetry Integration ===
/**
* 记录一次 LLM 调用结果到遥测收集器。
* 所有 adapter 的 complete() 返回后应调用此函数。
*/
function trackLLMCall(
adapter: LLMServiceAdapter,
response: LLMResponse,
error?: unknown,
): void {
try {
recordLLMUsage(
response.model || adapter.getProvider(),
response.tokensUsed?.input ?? 0,
response.tokensUsed?.output ?? 0,
{
latencyMs: response.latencyMs,
success: !error,
errorType: error instanceof Error ? error.message.slice(0, 80) : undefined,
connectionMode: adapter.getProvider() === 'saas' ? 'saas' : 'tauri',
},
);
} catch (e) {
log.debug('Telemetry recording failed', { error: e });
}
}
// === Helper Functions ===
export async function llmReflect(context: string, adapter?: LLMServiceAdapter): Promise<string> {
const llm = adapter || getLLMAdapter();
const response = await llm.complete([
{ role: 'system', content: LLM_PROMPTS.reflection.system },
{ role: 'user', content: typeof LLM_PROMPTS.reflection.user === 'function' ? LLM_PROMPTS.reflection.user(context) : LLM_PROMPTS.reflection.user },
]);
trackLLMCall(llm, response);
return response.content;
}
export async function llmCompact(messages: string, adapter?: LLMServiceAdapter): Promise<string> {
const llm = adapter || getLLMAdapter();
const response = await llm.complete([
{ role: 'system', content: LLM_PROMPTS.compaction.system },
{ role: 'user', content: typeof LLM_PROMPTS.compaction.user === 'function' ? LLM_PROMPTS.compaction.user(messages) : LLM_PROMPTS.compaction.user },
]);
trackLLMCall(llm, response);
return response.content;
}
export async function llmExtract(
conversation: string,
adapter?: LLMServiceAdapter
): Promise<string> {
const llm = adapter || getLLMAdapter();
const response = await llm.complete([
{ role: 'system', content: LLM_PROMPTS.extraction.system },
{ role: 'user', content: typeof LLM_PROMPTS.extraction.user === 'function' ? LLM_PROMPTS.extraction.user(conversation) : LLM_PROMPTS.extraction.user },
]);
trackLLMCall(llm, response);
return response.content;
}