feat(saas): add knowledge base module — categories, items, versions, search, analytics

- 5 knowledge tables (categories, items, chunks, versions, usage) with pgvector + HNSW + GIN indexes
- 23+ API routes covering full CRUD, tree-structured categories, version snapshots
- Keyword-based search with ILIKE + array match (placeholder for vector search)
- Analytics endpoints: overview, trends, top-items, quality, gaps
- Markdown-aware content chunking with overlap strategy
- Worker dispatch for async embedding generation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-04-02 00:21:28 +08:00
parent b66087de0e
commit ef60f9a183
8 changed files with 1194 additions and 1 deletions

View File

@@ -0,0 +1,109 @@
-- Migration: Knowledge Base tables with pgvector support
-- 5 tables: knowledge_categories, knowledge_items, knowledge_chunks,
-- knowledge_versions, knowledge_usage
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- 行业分类树
CREATE TABLE IF NOT EXISTS knowledge_categories (
id TEXT PRIMARY KEY,
name VARCHAR(100) NOT NULL,
description TEXT,
parent_id TEXT REFERENCES knowledge_categories(id),
icon VARCHAR(50),
sort_order INT DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
CHECK (id != parent_id)
);
CREATE INDEX IF NOT EXISTS idx_kc_parent ON knowledge_categories(parent_id);
-- 知识条目
CREATE TABLE IF NOT EXISTS knowledge_items (
id TEXT PRIMARY KEY,
category_id TEXT NOT NULL REFERENCES knowledge_categories(id),
title VARCHAR(255) NOT NULL,
content TEXT NOT NULL,
keywords TEXT[] DEFAULT '{}',
related_questions TEXT[] DEFAULT '{}',
priority INT DEFAULT 0,
status VARCHAR(20) DEFAULT 'active' CHECK (status IN ('active', 'archived', 'deprecated')),
version INT DEFAULT 1,
source VARCHAR(50) DEFAULT 'manual',
tags TEXT[] DEFAULT '{}',
created_by TEXT NOT NULL REFERENCES accounts(id),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
CHECK (length(content) <= 100000)
);
CREATE INDEX IF NOT EXISTS idx_ki_category ON knowledge_items(category_id);
CREATE INDEX IF NOT EXISTS idx_ki_keywords ON knowledge_items USING GIN(keywords);
-- 知识分块RAG 检索核心)
CREATE TABLE IF NOT EXISTS knowledge_chunks (
id TEXT PRIMARY KEY,
item_id TEXT NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE,
chunk_index INT NOT NULL,
content TEXT NOT NULL,
embedding vector(1536),
keywords TEXT[] DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_kchunks_item ON knowledge_chunks(item_id);
CREATE INDEX IF NOT EXISTS idx_kchunks_keywords ON knowledge_chunks USING GIN(keywords);
-- 向量相似度索引HNSW无需预填充数据
-- 仅在有数据后创建此索引可提升性能,这里预创建
CREATE INDEX IF NOT EXISTS idx_kchunks_embedding ON knowledge_chunks
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- 版本快照
CREATE TABLE IF NOT EXISTS knowledge_versions (
id TEXT PRIMARY KEY,
item_id TEXT NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE,
version INT NOT NULL,
title VARCHAR(255) NOT NULL,
content TEXT NOT NULL,
keywords TEXT[] DEFAULT '{}',
related_questions TEXT[] DEFAULT '{}',
change_summary TEXT,
created_by TEXT NOT NULL REFERENCES accounts(id),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_kv_item ON knowledge_versions(item_id);
-- 使用追踪
CREATE TABLE IF NOT EXISTS knowledge_usage (
id TEXT PRIMARY KEY,
item_id TEXT NOT NULL REFERENCES knowledge_items(id),
chunk_id TEXT REFERENCES knowledge_chunks(id),
session_id VARCHAR(100),
query_text TEXT,
relevance_score FLOAT,
was_injected BOOLEAN DEFAULT FALSE,
agent_feedback VARCHAR(20) CHECK (agent_feedback IN ('positive', 'negative')),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_ku_item ON knowledge_usage(item_id);
CREATE INDEX IF NOT EXISTS idx_ku_created ON knowledge_usage(created_at);
-- 权限种子数据
UPDATE roles
SET permissions = REPLACE(
permissions,
']',
', "knowledge:read", "knowledge:write", "knowledge:admin", "knowledge:search"]'
)
WHERE name = 'super_admin'
AND permissions NOT LIKE '%knowledge:read%';
UPDATE roles
SET permissions = REPLACE(
permissions,
']',
', "knowledge:read", "knowledge:write", "knowledge:search"]'
)
WHERE name = 'admin'
AND permissions NOT LIKE '%knowledge:read%';