Files
zclaw_openfang/crates/zclaw-saas/migrations/20260413000001_knowledge_visibility_structured.sql
iven c3593d3438
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
feat(knowledge): Phase A 知识库可见性隔离 + 结构化数据源 + 蒸馏Worker
- knowledge_items 增加 visibility(public/private) + account_id 字段
- 新建 structured_sources + structured_rows 表 (Excel JSONB 行级存储)
- 结构化数据源 CRUD API (5 路由: list/get/rows/delete/query)
- 安全查询: JSONB GIN 索引 + 可见性过滤 + 行数限制
- 蒸馏 Worker: 复用 Provider Key Pool 调 DeepSeek/Qwen API
- L0 质量过滤: 长度/隐私检测
- create_item 增加 is_admin 参数控制可见性默认值
- generate_embedding: extract_keywords_from_text 改为 pub 复用

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-12 18:36:05 +08:00

78 lines
3.2 KiB
SQL

-- Phase A: 知识库可见性隔离 + 结构化数据源
-- 1. knowledge_items 增加 visibility + account_id (公共/私有隔离)
-- 2. 新建 structured_sources (Excel/CSV 数据源元数据)
-- 3. 新建 structured_rows (行级 JSONB 存储)
-- ============================================================
-- 1. knowledge_items 可见性扩展
-- ============================================================
ALTER TABLE knowledge_items
ADD COLUMN IF NOT EXISTS visibility VARCHAR(20) DEFAULT 'public'
CHECK (visibility IN ('public', 'private'));
ALTER TABLE knowledge_items
ADD COLUMN IF NOT EXISTS account_id TEXT REFERENCES accounts(id);
-- NULL account_id + public = Admin 上传的公共知识
-- 有 account_id + private = 用户私有知识
CREATE INDEX IF NOT EXISTS idx_ki_visibility
ON knowledge_items(visibility, account_id)
WHERE visibility = 'private';
-- ============================================================
-- 2. 结构化数据源 (Excel / CSV)
-- ============================================================
CREATE TABLE IF NOT EXISTS structured_sources (
id TEXT PRIMARY KEY,
account_id TEXT REFERENCES accounts(id), -- NULL=公共 (Admin上传)
title VARCHAR(255) NOT NULL, -- "2026春季面料目录"
description TEXT,
original_file_name VARCHAR(500),
sheet_names TEXT[] DEFAULT '{}', -- 工作表名称列表
row_count INT DEFAULT 0,
column_headers TEXT[] DEFAULT '{}', -- 合并所有列头 (用于搜索发现)
visibility VARCHAR(20) DEFAULT 'public'
CHECK (visibility IN ('public', 'private')),
industry_id TEXT, -- 关联行业 (可选)
status VARCHAR(20) DEFAULT 'active'
CHECK (status IN ('active', 'archived')),
created_by TEXT NOT NULL REFERENCES accounts(id),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_ss_visibility
ON structured_sources(visibility, account_id)
WHERE visibility = 'private';
CREATE INDEX IF NOT EXISTS idx_ss_industry
ON structured_sources(industry_id)
WHERE industry_id IS NOT NULL;
-- ============================================================
-- 3. 结构化数据行 (Excel 每行一条)
-- ============================================================
CREATE TABLE IF NOT EXISTS structured_rows (
id TEXT PRIMARY KEY,
source_id TEXT NOT NULL REFERENCES structured_sources(id) ON DELETE CASCADE,
sheet_name VARCHAR(255), -- 工作表名称
row_index INT NOT NULL, -- 行号
headers TEXT[] NOT NULL, -- 列头 ["型号","面料","克重","价格"]
row_data JSONB NOT NULL, -- {"型号":"A100","面料":"纯棉","克重":200,"价格":45}
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- JSONB GIN 索引: 支持对 row_data 任意字段精确查询
CREATE INDEX IF NOT EXISTS idx_sr_data
ON structured_rows USING GIN(row_data jsonb_path_ops);
CREATE INDEX IF NOT EXISTS idx_sr_source
ON structured_rows(source_id);
CREATE UNIQUE INDEX IF NOT EXISTS idx_sr_source_row
ON structured_rows(source_id, sheet_name, row_index);