feat(saas): add knowledge base module — categories, items, versions, search, analytics
- 5 knowledge tables (categories, items, chunks, versions, usage) with pgvector + HNSW + GIN indexes - 23+ API routes covering full CRUD, tree-structured categories, version snapshots - Keyword-based search with ILIKE + array match (placeholder for vector search) - Analytics endpoints: overview, trends, top-items, quality, gaps - Markdown-aware content chunking with overlap strategy - Worker dispatch for async embedding generation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
109
crates/zclaw-saas/migrations/20260402000002_knowledge_base.sql
Normal file
109
crates/zclaw-saas/migrations/20260402000002_knowledge_base.sql
Normal file
@@ -0,0 +1,109 @@
|
||||
-- Migration: Knowledge Base tables with pgvector support
|
||||
-- 5 tables: knowledge_categories, knowledge_items, knowledge_chunks,
|
||||
-- knowledge_versions, knowledge_usage
|
||||
|
||||
-- Enable pgvector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- 行业分类树
|
||||
CREATE TABLE IF NOT EXISTS knowledge_categories (
|
||||
id TEXT PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
description TEXT,
|
||||
parent_id TEXT REFERENCES knowledge_categories(id),
|
||||
icon VARCHAR(50),
|
||||
sort_order INT DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
CHECK (id != parent_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_kc_parent ON knowledge_categories(parent_id);
|
||||
|
||||
-- 知识条目
|
||||
CREATE TABLE IF NOT EXISTS knowledge_items (
|
||||
id TEXT PRIMARY KEY,
|
||||
category_id TEXT NOT NULL REFERENCES knowledge_categories(id),
|
||||
title VARCHAR(255) NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
keywords TEXT[] DEFAULT '{}',
|
||||
related_questions TEXT[] DEFAULT '{}',
|
||||
priority INT DEFAULT 0,
|
||||
status VARCHAR(20) DEFAULT 'active' CHECK (status IN ('active', 'archived', 'deprecated')),
|
||||
version INT DEFAULT 1,
|
||||
source VARCHAR(50) DEFAULT 'manual',
|
||||
tags TEXT[] DEFAULT '{}',
|
||||
created_by TEXT NOT NULL REFERENCES accounts(id),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
CHECK (length(content) <= 100000)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_ki_category ON knowledge_items(category_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_ki_keywords ON knowledge_items USING GIN(keywords);
|
||||
|
||||
-- 知识分块(RAG 检索核心)
|
||||
CREATE TABLE IF NOT EXISTS knowledge_chunks (
|
||||
id TEXT PRIMARY KEY,
|
||||
item_id TEXT NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE,
|
||||
chunk_index INT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
embedding vector(1536),
|
||||
keywords TEXT[] DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_kchunks_item ON knowledge_chunks(item_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_kchunks_keywords ON knowledge_chunks USING GIN(keywords);
|
||||
|
||||
-- 向量相似度索引(HNSW,无需预填充数据)
|
||||
-- 仅在有数据后创建此索引可提升性能,这里预创建
|
||||
CREATE INDEX IF NOT EXISTS idx_kchunks_embedding ON knowledge_chunks
|
||||
USING hnsw (embedding vector_cosine_ops)
|
||||
WITH (m = 16, ef_construction = 64);
|
||||
|
||||
-- 版本快照
|
||||
CREATE TABLE IF NOT EXISTS knowledge_versions (
|
||||
id TEXT PRIMARY KEY,
|
||||
item_id TEXT NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE,
|
||||
version INT NOT NULL,
|
||||
title VARCHAR(255) NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
keywords TEXT[] DEFAULT '{}',
|
||||
related_questions TEXT[] DEFAULT '{}',
|
||||
change_summary TEXT,
|
||||
created_by TEXT NOT NULL REFERENCES accounts(id),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_kv_item ON knowledge_versions(item_id);
|
||||
|
||||
-- 使用追踪
|
||||
CREATE TABLE IF NOT EXISTS knowledge_usage (
|
||||
id TEXT PRIMARY KEY,
|
||||
item_id TEXT NOT NULL REFERENCES knowledge_items(id),
|
||||
chunk_id TEXT REFERENCES knowledge_chunks(id),
|
||||
session_id VARCHAR(100),
|
||||
query_text TEXT,
|
||||
relevance_score FLOAT,
|
||||
was_injected BOOLEAN DEFAULT FALSE,
|
||||
agent_feedback VARCHAR(20) CHECK (agent_feedback IN ('positive', 'negative')),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_ku_item ON knowledge_usage(item_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_ku_created ON knowledge_usage(created_at);
|
||||
|
||||
-- 权限种子数据
|
||||
UPDATE roles
|
||||
SET permissions = REPLACE(
|
||||
permissions,
|
||||
']',
|
||||
', "knowledge:read", "knowledge:write", "knowledge:admin", "knowledge:search"]'
|
||||
)
|
||||
WHERE name = 'super_admin'
|
||||
AND permissions NOT LIKE '%knowledge:read%';
|
||||
|
||||
UPDATE roles
|
||||
SET permissions = REPLACE(
|
||||
permissions,
|
||||
']',
|
||||
', "knowledge:read", "knowledge:write", "knowledge:search"]'
|
||||
)
|
||||
WHERE name = 'admin'
|
||||
AND permissions NOT LIKE '%knowledge:read%';
|
||||
Reference in New Issue
Block a user