zclaw_openfang/crates/zclaw-growth/benches/retrieval_bench.rs

//! Benchmark for TF-IDF retrieval performance in zclaw-growth
//!
//! Measures:
//! - Indexing throughput (documents/sec)
//! - Query latency at various corpus sizes (10/50/100/500 candidates)
//! - Top-K retrieval latency

use criterion::{
    black_box, criterion_group, criterion_main, BenchmarkId, Criterion,
};
use zclaw_growth::retrieval::SemanticScorer;
use zclaw_growth::types::{MemoryEntry, MemoryType};

/// Generate a synthetic memory entry
fn make_entry(agent: &str, idx: usize, topic: &str, content: &str) -> MemoryEntry {
    MemoryEntry::new(
        agent,
        MemoryType::Knowledge,
        &format!("fact-{idx}"),
        content.to_string(),
    )
    .with_keywords(vec![topic.to_string(), format!("topic-{idx}")])
}

/// Build a corpus of N entries with realistic content
fn build_corpus(size: usize) -> (SemanticScorer, Vec<MemoryEntry>) {
    let mut scorer = SemanticScorer::new();
    let mut entries = Vec::with_capacity(size);

    let topics = [
        ("rust", "Rust is a systems programming language focused on safety and performance with zero-cost abstractions"),
        ("python", "Python is a high-level general-purpose programming language emphasizing code readability"),
        ("machine-learning", "Machine learning is a subset of artificial intelligence that enables systems to learn from data"),
        ("web-development", "Web development involves building and maintaining websites using frontend and backend technologies"),
        ("database", "Database management systems provide tools for storing retrieving and managing structured data efficiently"),
        ("security", "Cybersecurity involves protecting computer systems and networks from information disclosure"),
        ("devops", "DevOps combines software development and IT operations to shorten the systems development lifecycle"),
        ("testing", "Software testing validates that applications meet their specified requirements and are free of defects"),
        ("api", "API design involves creating interfaces that allow different software applications to communicate"),
        ("cloud", "Cloud computing delivers computing services over the internet including servers storage databases networking"),
    ];

    for i in 0..size {
        let (topic, base_content) = &topics[i % topics.len()];
        let content = format!(
            "{base_content}. This fact #{i} discusses {topic} in depth with examples and use cases. \
            The key concepts include {topic} patterns, Implementation details cover performance optimization."
        );
        let entry = make_entry("bench-agent", i, topic, &content);
        scorer.index_entry(&entry);
        entries.push(entry);
    }

    (scorer, entries)
}

/// Build a list of entries for indexing benchmarks
fn build_entries(count: usize) -> Vec<MemoryEntry> {
    let topics = ["rust", "python", "ml", "web", "database"];
    (0..count)
        .map(|i| {
            let topic = topics[i % topics.len()];
            let content = format!(
                "Fact {} about {}: detailed technical content with multiple keywords and concepts \
                covering advanced patterns, best practices, and optimization strategies.",
                i, topic
            );
            make_entry("bench-agent", i, topic, &content)
        })
        .collect()
}

// ─── Indexing throughput ───

fn bench_indexing(c: &mut Criterion) {
    let mut group = c.benchmark_group("index_entry");
    group.sample_size(50);

    for &batch_size in &[10, 50, 100, 500] {
        let entries = build_entries(batch_size);

        group.bench_with_input(
            BenchmarkId::new("batch", batch_size),
            &entries,
            |b, entries| {
                b.iter(|| {
                    let mut scorer = SemanticScorer::new();
                    for entry in entries {
                        scorer.index_entry(black_box(entry));
                    }
                });
            },
        );
    }
}

// ─── Query scoring latency ───

fn bench_query_scoring(c: &mut Criterion) {
    let mut group = c.benchmark_group("score_similarity");

    for &corpus_size in &[10, 50, 100, 500] {
        let (scorer, entries) = build_corpus(corpus_size);
        let query = "rust safety performance optimization";
        let entry = &entries[corpus_size / 2];

        group.bench_with_input(
            BenchmarkId::new("corpus", corpus_size),
            &scorer,
            |b, scorer| {
                b.iter(|| scorer.score_similarity(black_box(query), black_box(entry)));
            },
        );
    }
}

// ─── Top-K retrieval ───

fn bench_top_k_retrieval(c: &mut Criterion) {
    let mut group = c.benchmark_group("top_k_retrieval");

    for &corpus_size in &[10, 50, 100, 500] {
        let (scorer, entries) = build_corpus(corpus_size);
        let query = "machine learning model training optimization";

        group.bench_with_input(
            BenchmarkId::new("top3", corpus_size),
            &entries,
            |b, entries| {
                b.iter(|| {
                    let mut scored: Vec<(f32, usize)> = entries
                        .iter()
                        .enumerate()
                        .map(|(idx, entry)| (scorer.score_similarity(query, entry), idx))
                        .collect();
                    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
                    let _top3 = scored.into_iter().take(3).collect::<Vec<_>>();
                });
            },
        );
    }
}

criterion_group!(
    benches,
    bench_indexing,
    bench_query_scoring,
    bench_top_k_retrieval,
);

criterion_main!(benches);