- 5 knowledge tables (categories, items, chunks, versions, usage) with pgvector + HNSW + GIN indexes - 23+ API routes covering full CRUD, tree-structured categories, version snapshots - Keyword-based search with ILIKE + array match (placeholder for vector search) - Analytics endpoints: overview, trends, top-items, quality, gaps - Markdown-aware content chunking with overlap strategy - Worker dispatch for async embedding generation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
110 lines
3.8 KiB
SQL
110 lines
3.8 KiB
SQL
-- Migration: Knowledge Base tables with pgvector support
|
||
-- 5 tables: knowledge_categories, knowledge_items, knowledge_chunks,
|
||
-- knowledge_versions, knowledge_usage
|
||
|
||
-- Enable pgvector extension
|
||
CREATE EXTENSION IF NOT EXISTS vector;
|
||
|
||
-- 行业分类树
|
||
CREATE TABLE IF NOT EXISTS knowledge_categories (
|
||
id TEXT PRIMARY KEY,
|
||
name VARCHAR(100) NOT NULL,
|
||
description TEXT,
|
||
parent_id TEXT REFERENCES knowledge_categories(id),
|
||
icon VARCHAR(50),
|
||
sort_order INT DEFAULT 0,
|
||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||
CHECK (id != parent_id)
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_kc_parent ON knowledge_categories(parent_id);
|
||
|
||
-- 知识条目
|
||
CREATE TABLE IF NOT EXISTS knowledge_items (
|
||
id TEXT PRIMARY KEY,
|
||
category_id TEXT NOT NULL REFERENCES knowledge_categories(id),
|
||
title VARCHAR(255) NOT NULL,
|
||
content TEXT NOT NULL,
|
||
keywords TEXT[] DEFAULT '{}',
|
||
related_questions TEXT[] DEFAULT '{}',
|
||
priority INT DEFAULT 0,
|
||
status VARCHAR(20) DEFAULT 'active' CHECK (status IN ('active', 'archived', 'deprecated')),
|
||
version INT DEFAULT 1,
|
||
source VARCHAR(50) DEFAULT 'manual',
|
||
tags TEXT[] DEFAULT '{}',
|
||
created_by TEXT NOT NULL REFERENCES accounts(id),
|
||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||
CHECK (length(content) <= 100000)
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_ki_category ON knowledge_items(category_id);
|
||
CREATE INDEX IF NOT EXISTS idx_ki_keywords ON knowledge_items USING GIN(keywords);
|
||
|
||
-- 知识分块(RAG 检索核心)
|
||
CREATE TABLE IF NOT EXISTS knowledge_chunks (
|
||
id TEXT PRIMARY KEY,
|
||
item_id TEXT NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE,
|
||
chunk_index INT NOT NULL,
|
||
content TEXT NOT NULL,
|
||
embedding vector(1536),
|
||
keywords TEXT[] DEFAULT '{}',
|
||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_kchunks_item ON knowledge_chunks(item_id);
|
||
CREATE INDEX IF NOT EXISTS idx_kchunks_keywords ON knowledge_chunks USING GIN(keywords);
|
||
|
||
-- 向量相似度索引(HNSW,无需预填充数据)
|
||
-- 仅在有数据后创建此索引可提升性能,这里预创建
|
||
CREATE INDEX IF NOT EXISTS idx_kchunks_embedding ON knowledge_chunks
|
||
USING hnsw (embedding vector_cosine_ops)
|
||
WITH (m = 16, ef_construction = 64);
|
||
|
||
-- 版本快照
|
||
CREATE TABLE IF NOT EXISTS knowledge_versions (
|
||
id TEXT PRIMARY KEY,
|
||
item_id TEXT NOT NULL REFERENCES knowledge_items(id) ON DELETE CASCADE,
|
||
version INT NOT NULL,
|
||
title VARCHAR(255) NOT NULL,
|
||
content TEXT NOT NULL,
|
||
keywords TEXT[] DEFAULT '{}',
|
||
related_questions TEXT[] DEFAULT '{}',
|
||
change_summary TEXT,
|
||
created_by TEXT NOT NULL REFERENCES accounts(id),
|
||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_kv_item ON knowledge_versions(item_id);
|
||
|
||
-- 使用追踪
|
||
CREATE TABLE IF NOT EXISTS knowledge_usage (
|
||
id TEXT PRIMARY KEY,
|
||
item_id TEXT NOT NULL REFERENCES knowledge_items(id),
|
||
chunk_id TEXT REFERENCES knowledge_chunks(id),
|
||
session_id VARCHAR(100),
|
||
query_text TEXT,
|
||
relevance_score FLOAT,
|
||
was_injected BOOLEAN DEFAULT FALSE,
|
||
agent_feedback VARCHAR(20) CHECK (agent_feedback IN ('positive', 'negative')),
|
||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_ku_item ON knowledge_usage(item_id);
|
||
CREATE INDEX IF NOT EXISTS idx_ku_created ON knowledge_usage(created_at);
|
||
|
||
-- 权限种子数据
|
||
UPDATE roles
|
||
SET permissions = REPLACE(
|
||
permissions,
|
||
']',
|
||
', "knowledge:read", "knowledge:write", "knowledge:admin", "knowledge:search"]'
|
||
)
|
||
WHERE name = 'super_admin'
|
||
AND permissions NOT LIKE '%knowledge:read%';
|
||
|
||
UPDATE roles
|
||
SET permissions = REPLACE(
|
||
permissions,
|
||
']',
|
||
', "knowledge:read", "knowledge:write", "knowledge:search"]'
|
||
)
|
||
WHERE name = 'admin'
|
||
AND permissions NOT LIKE '%knowledge:read%';
|