Testing Strategy
Use this when you need to test tools, plugins, state keys, or full agent runs without depending on a live LLM.
Prerequisites
Section titled “Prerequisites”awakencrate added toCargo.toml(with the runtime re-exports)tokiowithrtandmacrosfeatures for async testsserde_jsonfor constructing tool arguments and assertions
1. Unit testing a Tool
Section titled “1. Unit testing a Tool”Create a ToolCallContext with a test snapshot, call tool.execute(), and assert on the returned ToolOutput.
use async_trait::async_trait;use serde_json::{Value, json};use awaken::contract::tool::{ Tool, ToolCallContext, ToolDescriptor, ToolError, ToolOutput, ToolResult,};
struct GreetTool;
#[async_trait]impl Tool for GreetTool { fn descriptor(&self) -> ToolDescriptor { ToolDescriptor::new("greet", "greet", "Greet a user by name") }
async fn execute(&self, args: Value, _ctx: &ToolCallContext) -> Result<ToolOutput, ToolError> { let name = args["name"] .as_str() .ok_or_else(|| ToolError::InvalidArguments("missing 'name'".into()))?; Ok(ToolResult::success("greet", json!({ "greeting": format!("Hello, {name}!") })).into()) }}
#[cfg(test)]mod tests { use super::*;
#[tokio::test] async fn greet_tool_returns_greeting() { let tool = GreetTool; let ctx = ToolCallContext::test_default();
let output = tool.execute(json!({"name": "Alice"}), &ctx).await.unwrap();
assert!(output.result.is_success()); assert_eq!(output.result.data["greeting"], "Hello, Alice!"); }
#[tokio::test] async fn greet_tool_rejects_missing_name() { let tool = GreetTool; let ctx = ToolCallContext::test_default();
let err = tool.execute(json!({}), &ctx).await.unwrap_err();
assert!(matches!(err, ToolError::InvalidArguments(_))); }}When your tool returns side-effects via ToolOutput::with_command(), assert on the command field:
#[tokio::test]async fn tool_produces_state_command() { let tool = CounterMutationTool { increment: 5 }; let ctx = ToolCallContext::test_default();
let output = tool.execute(json!({}), &ctx).await.unwrap();
assert!(output.result.is_success()); // The command is opaque at this level; integration tests (section 4) // verify that commands are applied correctly to the StateStore. assert!(!output.command.is_empty());}2. Unit testing a Plugin
Section titled “2. Unit testing a Plugin”Verify that a plugin registers the expected state keys and hooks by creating a PluginRegistrar and calling plugin.register().
use awaken::contract::StateError;use awaken::state::{StateKey, MergeStrategy, StateKeyOptions, StateCommand};use awaken::plugins::{Plugin, PluginDescriptor, PluginRegistrar};use serde::{Serialize, Deserialize};
// -- State key --
struct Counter;
impl StateKey for Counter { const KEY: &'static str = "test.counter"; const MERGE: MergeStrategy = MergeStrategy::Commutative; type Value = usize; type Update = usize;
fn apply(value: &mut Self::Value, update: Self::Update) { *value += update; }}
// -- Plugin --
struct CounterPlugin;
impl Plugin for CounterPlugin { fn descriptor(&self) -> PluginDescriptor { PluginDescriptor { name: "counter" } }
fn register(&self, r: &mut PluginRegistrar) -> Result<(), StateError> { r.register_key::<Counter>(StateKeyOptions::default())?; Ok(()) }}
#[cfg(test)]mod tests { use super::*; use awaken::state::StateStore;
#[test] fn counter_plugin_registers_key() { let store = StateStore::new(); // install_plugin calls register() internally store.install_plugin(CounterPlugin).unwrap();
// The key should now be readable (returns the default value) let val = store.read::<Counter>().unwrap_or_default(); assert_eq!(val, 0); }
#[test] fn counter_plugin_rejects_double_registration() { let store = StateStore::new(); store.install_plugin(CounterPlugin).unwrap();
// Registering the same key again should fail let result = store.install_plugin(CounterPlugin); assert!(result.is_err()); }}To test a phase hook directly, build a minimal PhaseContext and inspect the returned StateCommand:
#[tokio::test]async fn audit_hook_appends_entry() { let hook = AuditHook; let ctx = PhaseContext::test_default();
let cmd = hook.run(&ctx).await.unwrap();
// Commit the command to a test store and verify let store = StateStore::new(); store.install_plugin(AuditPlugin).unwrap(); store.commit(cmd).unwrap();
let log = store.read::<AuditLogKey>().unwrap(); assert!(!log.entries.is_empty());}3. Unit testing a StateKey
Section titled “3. Unit testing a StateKey”Test apply() mutations directly without any runtime overhead:
use awaken::state::{StateKey, MergeStrategy};
struct HitCounter;
impl StateKey for HitCounter { const KEY: &'static str = "test.hit_counter"; const MERGE: MergeStrategy = MergeStrategy::Commutative; type Value = u64; type Update = u64;
fn apply(value: &mut Self::Value, update: Self::Update) { *value += update; }}
#[cfg(test)]mod tests { use super::*;
#[test] fn apply_increments_counter() { let mut value = u64::default(); HitCounter::apply(&mut value, 5); assert_eq!(value, 5); HitCounter::apply(&mut value, 3); assert_eq!(value, 8); }
#[test] fn apply_from_default_is_identity_free() { let mut value = u64::default(); HitCounter::apply(&mut value, 0); assert_eq!(value, 0); }}For keys with complex value types, test edge cases like empty collections or merge conflicts:
#[test]fn apply_merge_replaces_entries() { let mut log = AuditLog { entries: vec!["old".into()] }; AuditLogKey::apply(&mut log, AuditLog { entries: vec!["new".into()] }); // Exclusive merge replaces the entire value assert_eq!(log.entries, vec!["new"]);}4. Integration testing with a mock LLM
Section titled “4. Integration testing with a mock LLM”Build a full agent runtime with a scripted LlmExecutor that returns canned responses. This is the primary pattern used in the awaken-runtime integration tests.
use std::sync::{Arc, Mutex};use async_trait::async_trait;use serde_json::json;
use awaken::contract::content::ContentBlock;use awaken::contract::event_sink::VecEventSink;use awaken::contract::executor::{InferenceExecutionError, InferenceRequest, LlmExecutor};use awaken::contract::identity::{RunIdentity, RunOrigin};use awaken::contract::inference::{StopReason, StreamResult};use awaken::contract::message::{Message, ToolCall};use awaken::contract::tool::{ Tool, ToolCallContext, ToolDescriptor, ToolError, ToolOutput, ToolResult,};use awaken::loop_runner::{AgentLoopParams, LoopStatePlugin, build_agent_env, run_agent_loop};use awaken::registry::{AgentResolver, ResolvedAgent};use awaken::state::StateStore;use awaken::phase::PhaseRuntime;use awaken::RuntimeError;
// -- Scripted LLM executor --
struct ScriptedLlm { responses: Mutex<Vec<StreamResult>>,}
impl ScriptedLlm { fn new(responses: Vec<StreamResult>) -> Self { Self { responses: Mutex::new(responses), } }}
#[async_trait]impl LlmExecutor for ScriptedLlm { async fn execute( &self, _request: InferenceRequest, ) -> Result<StreamResult, InferenceExecutionError> { let mut responses = self.responses.lock().unwrap(); if responses.is_empty() { // Fallback: end the conversation Ok(StreamResult { content: vec![ContentBlock::text("Done.")], tool_calls: vec![], usage: None, stop_reason: Some(StopReason::EndTurn), has_incomplete_tool_calls: false, }) } else { Ok(responses.remove(0)) } }
fn name(&self) -> &str { "scripted" }}
// -- Resolver --
struct FixedResolver { agent: ResolvedAgent,}
impl AgentResolver for FixedResolver { fn resolve(&self, _agent_id: &str) -> Result<ResolvedAgent, RuntimeError> { let mut agent = self.agent.clone(); agent.env = build_agent_env(&[], &agent)?; Ok(agent) }}
// -- Helpers --
fn tool_step(calls: Vec<ToolCall>) -> StreamResult { StreamResult { content: vec![], tool_calls: calls, usage: None, stop_reason: Some(StopReason::ToolUse), has_incomplete_tool_calls: false, }}
fn text_step(text: &str) -> StreamResult { StreamResult { content: vec![ContentBlock::text(text)], tool_calls: vec![], usage: None, stop_reason: Some(StopReason::EndTurn), has_incomplete_tool_calls: false, }}
fn test_identity() -> RunIdentity { RunIdentity::new( "thread-test".into(), None, "run-test".into(), None, "agent".into(), RunOrigin::User, )}
// -- Test --
#[tokio::test]async fn tool_call_flow_end_to_end() { // Script: LLM calls get_weather, then responds with text let llm = Arc::new(ScriptedLlm::new(vec![ tool_step(vec![ToolCall::new("c1", "get_weather", json!({"city": "Tokyo"}))]), text_step("The weather in Tokyo is sunny."), ]));
let agent = ResolvedAgent::new("test", "model", "You are helpful.", llm) .with_tool(Arc::new(GetWeatherTool));
let store = StateStore::new(); let runtime = PhaseRuntime::new(store.clone()).unwrap(); store.install_plugin(LoopStatePlugin).unwrap();
let resolver = FixedResolver { agent }; let sink = Arc::new(VecEventSink::new());
let result = run_agent_loop(AgentLoopParams { resolver: &resolver, agent_id: "test", runtime: &runtime, sink: sink.clone(), checkpoint_store: None, messages: vec![Message::user("What's the weather?")], run_identity: test_identity(), cancellation_token: None, decision_rx: None, overrides: None, frontend_tools: Vec::new(), inbox: None, is_continuation: false, }) .await .unwrap();
assert_eq!(result.response, "The weather in Tokyo is sunny."); assert_eq!(result.steps, 2); // tool step + text step}For simpler cases, use the built-in MockLlmExecutor which returns text-only responses:
use awaken::engine::MockLlmExecutor;
let llm = Arc::new(MockLlmExecutor::new().with_responses(vec!["Hello!".into()]));let agent = ResolvedAgent::new("test", "model", "system prompt", llm);5. Testing event streams
Section titled “5. Testing event streams”Use VecEventSink to capture all events emitted during a run and assert on their sequence and content.
use awaken::contract::event::AgentEvent;use awaken::contract::event_sink::VecEventSink;use awaken::contract::lifecycle::TerminationReason;
#[tokio::test]async fn events_follow_expected_lifecycle() { // ... set up runtime and run agent (see section 4) ...
let events = sink.take();
// Verify ordering: RunStart -> StepStart -> ... -> StepEnd -> RunFinish assert!(matches!(events.first(), Some(AgentEvent::RunStart { .. }))); assert!(matches!(events.last(), Some(AgentEvent::RunFinish { .. })));
// Count specific event types let step_starts = events.iter().filter(|e| matches!(e, AgentEvent::StepStart { .. })).count(); let step_ends = events.iter().filter(|e| matches!(e, AgentEvent::StepEnd)).count(); assert_eq!(step_starts, step_ends, "every StepStart needs a StepEnd");
// Verify termination reason if let Some(AgentEvent::RunFinish { termination, .. }) = events.last() { assert_eq!(*termination, TerminationReason::NaturalEnd); }
// Check that tool call events appear in the correct order let has_tool_start = events.iter().any(|e| matches!(e, AgentEvent::ToolCallStart { .. })); let has_tool_done = events.iter().any(|e| matches!(e, AgentEvent::ToolCallDone { .. })); if has_tool_start { assert!(has_tool_done, "ToolCallStart without ToolCallDone"); let start_idx = events.iter().position(|e| matches!(e, AgentEvent::ToolCallStart { .. })).unwrap(); let done_idx = events.iter().position(|e| matches!(e, AgentEvent::ToolCallDone { .. })).unwrap(); assert!(start_idx < done_idx); }}A reusable helper for event type extraction (used in the runtime test suite):
fn event_type(e: &AgentEvent) -> &'static str { match e { AgentEvent::RunStart { .. } => "run_start", AgentEvent::RunFinish { .. } => "run_finish", AgentEvent::StepStart { .. } => "step_start", AgentEvent::StepEnd => "step_end", AgentEvent::TextDelta { .. } => "text_delta", AgentEvent::ToolCallStart { .. } => "tool_call_start", AgentEvent::ToolCallDone { .. } => "tool_call_done", AgentEvent::InferenceComplete { .. } => "inference_complete", AgentEvent::StateSnapshot { .. } => "state_snapshot", _ => "other", }}
let types: Vec<&str> = events.iter().map(event_type).collect();assert_eq!(types[0], "run_start");assert_eq!(*types.last().unwrap(), "run_finish");6. Testing with a real LLM (live tests)
Section titled “6. Testing with a real LLM (live tests)”Keep normal CI offline. Use scripted providers for deterministic e2e coverage, and put real-provider checks behind #[ignore].
The README quickstart path is covered without network access by crates/awaken/tests/readme_quickstart.rs. The live provider smoke test is crates/awaken/tests/readme_live_provider.rs; it exercises GenaiExecutor, AgentRuntimeBuilder, ModelSpec resolution, and run_to_completion against a real provider.
Run live tests with:
# OpenAI-compatible providerOPENAI_API_KEY=<your-key> LLM_MODEL=gpt-4o-mini \ cargo test -p awaken --test readme_live_provider -- --ignored
# Custom endpoint (e.g. BigModel)LLM_BASE_URL=https://open.bigmodel.cn/api/paas/v4/ \ LLM_API_KEY=<key> \ LLM_MODEL=GLM-4.7-Flash \ cargo test -p awaken --test readme_live_provider -- --ignoredSee crates/awaken/examples/live_test.rs and crates/awaken/examples/tool_call_live.rs for complete working examples with console output.
Key Files
Section titled “Key Files”crates/awaken-runtime-contract/src/contract/tool.rs—Tooltrait,ToolCallContext::test_default(),ToolResult,ToolOutputcrates/awaken-runtime-contract/src/contract/event_sink.rs—VecEventSinkcrates/awaken-runtime/src/engine/mock.rs—MockLlmExecutorcrates/awaken-runtime/src/state/mod.rs—StateStore,StateCommandcrates/awaken-runtime/src/loop_runner/mod.rs—run_agent_loop,AgentLoopParams,AgentRunResultcrates/awaken-runtime/tests/— integration test suite (event lifecycle, tool side effects)