fix(core,health): 文章内容 sanitize 保留安全 HTML 标签 + 血透测试文章种子

- 新增 sanitize_rich_html() 使用 ammonia 白名单保留安全 HTML 标签和内联样式
- 修复文章创建/更新时 content 被 strip_html_tags() 完全剥离的问题
- ammonia 4 不允许手动指定 <a> 的 rel 属性(自动管理),已从 tag_attrs 移除
- 新增 3 个 sanitize_rich_html 单元测试
- 新增 seed-dialysis-articles.mjs 种子脚本(4 篇血透相关富文本文章)
This commit is contained in:
iven
2026-05-11 03:13:43 +08:00
parent c716cc0f7b
commit e00ee69d28
3 changed files with 375 additions and 3 deletions

View File

@@ -43,6 +43,79 @@ pub fn sanitize_string(input: &str) -> String {
strip_html_tags(input)
}
/// 对富文本 HTML 进行安全清理,保留安全的 HTML 标签和内联样式,去除危险元素。
/// 适用于文章内容等需要保留 HTML 排版的场景。
pub fn sanitize_rich_html(input: &str) -> String {
use std::collections::{HashMap, HashSet};
let tag_attrs: HashMap<&str, HashSet<&str>> = [
("div", HashSet::from(["style", "data-w-e-type"])),
("span", HashSet::from(["style"])),
("p", HashSet::from(["style"])),
(
"img",
HashSet::from(["src", "alt", "style", "width", "height"]),
),
("a", HashSet::from(["href", "target"])),
("td", HashSet::from(["style", "colspan", "rowspan"])),
("th", HashSet::from(["style", "colspan", "rowspan"])),
("blockquote", HashSet::from(["style"])),
]
.into_iter()
.collect();
ammonia::Builder::new()
.tags(
[
"p",
"br",
"span",
"div",
"strong",
"b",
"em",
"i",
"u",
"s",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"ul",
"ol",
"li",
"blockquote",
"pre",
"code",
"table",
"thead",
"tbody",
"tr",
"th",
"td",
"img",
"a",
"hr",
]
.into_iter()
.collect(),
)
.tag_attributes(tag_attrs)
.generic_attributes(HashSet::from(["style"]))
.url_relative(ammonia::UrlRelative::PassThrough)
.clean(input)
.to_string()
}
/// 对 Option<String> 的富文本进行安全清理。
pub fn sanitize_rich_html_option(input: Option<String>) -> Option<String> {
input
.map(|s| sanitize_rich_html(&s))
.filter(|s| !s.trim().is_empty())
}
#[cfg(test)]
mod tests {
use super::*;
@@ -108,4 +181,38 @@ mod tests {
let result = strip_html_tags("a &lt; b");
assert!(result.contains("a") && result.contains("b"));
}
#[test]
fn rich_html_preserves_safe_tags() {
let html = r#"<p>Hello</p><div style="background:#f0fdf4;padding:14px">Green box</div><strong>Bold</strong>"#;
let result = sanitize_rich_html(html);
assert!(result.contains("<p>Hello</p>"), "should preserve <p> tags");
assert!(
result.contains("<strong>Bold</strong>"),
"should preserve <strong>"
);
assert!(
result.contains("background"),
"should preserve style attribute"
);
}
#[test]
fn rich_html_removes_script() {
let html = r#"<p>Hello</p><script>alert(1)</script>"#;
let result = sanitize_rich_html(html);
assert!(!result.contains("script"), "should remove script tags");
assert!(result.contains("Hello"));
}
#[test]
fn rich_html_preserves_styled_block() {
let html = r#"<div data-w-e-type="styled-block" style="background:#f0fdf4;border-radius:8px;padding:14px">Tip content</div>"#;
let result = sanitize_rich_html(html);
assert!(
result.contains("styled-block"),
"should preserve data-w-e-type"
);
assert!(result.contains("Tip content"));
}
}

View File

@@ -2,7 +2,9 @@ use serde::{Deserialize, Serialize};
use utoipa::{IntoParams, ToSchema};
use uuid::Uuid;
use erp_core::sanitize::{sanitize_option, sanitize_string, strip_html_tags};
use erp_core::sanitize::{
sanitize_option, sanitize_rich_html_option, sanitize_string, strip_html_tags,
};
// ---------------------------------------------------------------------------
// 文章 DTOs
@@ -92,7 +94,8 @@ impl CreateArticleReq {
pub fn sanitize(&mut self) {
self.title = sanitize_string(&self.title);
self.summary = sanitize_option(self.summary.take());
self.content = sanitize_option(self.content.take());
// content: rich_text 模式保留 HTML仅做安全清理其他模式剥离标签
self.content = sanitize_rich_html_option(self.content.take());
self.category = sanitize_option(self.category.take());
self.author = sanitize_option(self.author.take());
self.slug = sanitize_option(self.slug.take());
@@ -125,7 +128,8 @@ impl UpdateArticleReq {
*v = strip_html_tags(v);
}
self.summary = sanitize_option(self.summary.take());
self.content = sanitize_option(self.content.take());
// content: rich_text 模式保留 HTML仅做安全清理其他模式剥离标签
self.content = sanitize_rich_html_option(self.content.take());
self.category = sanitize_option(self.category.take());
self.author = sanitize_option(self.author.take());
self.slug = sanitize_option(self.slug.take());