fix(core,health): 文章内容 sanitize 保留安全 HTML 标签 + 血透测试文章种子

- 新增 sanitize_rich_html() 使用 ammonia 白名单保留安全 HTML 标签和内联样式 - 修复文章创建/更新时 content 被 strip_html_tags() 完全剥离的问题 - ammonia 4 不允许手动指定 <a> 的 rel 属性（自动管理），已从 tag_attrs 移除 - 新增 3 个 sanitize_rich_html 单元测试 - 新增 seed-dialysis-articles.mjs 种子脚本（4 篇血透相关富文本文章）
2026-05-11 03:13:43 +08:00
parent c716cc0f7b
commit e00ee69d28
3 changed files with 375 additions and 3 deletions
--- a/crates/erp-core/src/sanitize.rs
+++ b/crates/erp-core/src/sanitize.rs
@@ -43,6 +43,79 @@ pub fn sanitize_string(input: &str) -> String {
    strip_html_tags(input)
 }

+/// 对富文本 HTML 进行安全清理，保留安全的 HTML 标签和内联样式，去除危险元素。
+/// 适用于文章内容等需要保留 HTML 排版的场景。
+pub fn sanitize_rich_html(input: &str) -> String {
+    use std::collections::{HashMap, HashSet};
+
+    let tag_attrs: HashMap<&str, HashSet<&str>> = [
+        ("div", HashSet::from(["style", "data-w-e-type"])),
+        ("span", HashSet::from(["style"])),
+        ("p", HashSet::from(["style"])),
+        (
+            "img",
+            HashSet::from(["src", "alt", "style", "width", "height"]),
+        ),
+        ("a", HashSet::from(["href", "target"])),
+        ("td", HashSet::from(["style", "colspan", "rowspan"])),
+        ("th", HashSet::from(["style", "colspan", "rowspan"])),
+        ("blockquote", HashSet::from(["style"])),
+    ]
+    .into_iter()
+    .collect();
+
+    ammonia::Builder::new()
+        .tags(
+            [
+                "p",
+                "br",
+                "span",
+                "div",
+                "strong",
+                "b",
+                "em",
+                "i",
+                "u",
+                "s",
+                "h1",
+                "h2",
+                "h3",
+                "h4",
+                "h5",
+                "h6",
+                "ul",
+                "ol",
+                "li",
+                "blockquote",
+                "pre",
+                "code",
+                "table",
+                "thead",
+                "tbody",
+                "tr",
+                "th",
+                "td",
+                "img",
+                "a",
+                "hr",
+            ]
+            .into_iter()
+            .collect(),
+        )
+        .tag_attributes(tag_attrs)
+        .generic_attributes(HashSet::from(["style"]))
+        .url_relative(ammonia::UrlRelative::PassThrough)
+        .clean(input)
+        .to_string()
+}
+
+/// 对 Option<String> 的富文本进行安全清理。
+pub fn sanitize_rich_html_option(input: Option<String>) -> Option<String> {
+    input
+        .map(|s| sanitize_rich_html(&s))
+        .filter(|s| !s.trim().is_empty())
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -108,4 +181,38 @@ mod tests {
        let result = strip_html_tags("a &lt; b");
        assert!(result.contains("a") && result.contains("b"));
    }
+
+    #[test]
+    fn rich_html_preserves_safe_tags() {
+        let html = r#"<p>Hello</p><div style="background:#f0fdf4;padding:14px">Green box</div><strong>Bold</strong>"#;
+        let result = sanitize_rich_html(html);
+        assert!(result.contains("<p>Hello</p>"), "should preserve <p> tags");
+        assert!(
+            result.contains("<strong>Bold</strong>"),
+            "should preserve <strong>"
+        );
+        assert!(
+            result.contains("background"),
+            "should preserve style attribute"
+        );
+    }
+
+    #[test]
+    fn rich_html_removes_script() {
+        let html = r#"<p>Hello</p><script>alert(1)</script>"#;
+        let result = sanitize_rich_html(html);
+        assert!(!result.contains("script"), "should remove script tags");
+        assert!(result.contains("Hello"));
+    }
+
+    #[test]
+    fn rich_html_preserves_styled_block() {
+        let html = r#"<div data-w-e-type="styled-block" style="background:#f0fdf4;border-radius:8px;padding:14px">Tip content</div>"#;
+        let result = sanitize_rich_html(html);
+        assert!(
+            result.contains("styled-block"),
+            "should preserve data-w-e-type"
+        );
+        assert!(result.contains("Tip content"));
+    }
 }
--- a/crates/erp-health/src/dto/article_dto.rs
+++ b/crates/erp-health/src/dto/article_dto.rs
@@ -2,7 +2,9 @@ use serde::{Deserialize, Serialize};
 use utoipa::{IntoParams, ToSchema};
 use uuid::Uuid;

-use erp_core::sanitize::{sanitize_option, sanitize_string, strip_html_tags};
+use erp_core::sanitize::{
+    sanitize_option, sanitize_rich_html_option, sanitize_string, strip_html_tags,
+};

 // ---------------------------------------------------------------------------
 // 文章 DTOs
@@ -92,7 +94,8 @@ impl CreateArticleReq {
    pub fn sanitize(&mut self) {
        self.title = sanitize_string(&self.title);
        self.summary = sanitize_option(self.summary.take());
-        self.content = sanitize_option(self.content.take());
+        // content: rich_text 模式保留 HTML（仅做安全清理），其他模式剥离标签
+        self.content = sanitize_rich_html_option(self.content.take());
        self.category = sanitize_option(self.category.take());
        self.author = sanitize_option(self.author.take());
        self.slug = sanitize_option(self.slug.take());
@@ -125,7 +128,8 @@ impl UpdateArticleReq {
            *v = strip_html_tags(v);
        }
        self.summary = sanitize_option(self.summary.take());
-        self.content = sanitize_option(self.content.take());
+        // content: rich_text 模式保留 HTML（仅做安全清理），其他模式剥离标签
+        self.content = sanitize_rich_html_option(self.content.take());
        self.category = sanitize_option(self.category.take());
        self.author = sanitize_option(self.author.take());
        self.slug = sanitize_option(self.slug.take());