⚡️ Add fast model support for subagents to reduce API costs

hyperb1iss · hyperb1iss · commit 485a0f903092 · 2025-11-26T01:01:41.000-08:00
Introduce a separate fast_model configuration that subagents and
ParallelAnalyze use instead of the main model. This allows using
cheaper, faster models (e.g., gpt-4o-mini) for bounded tasks while
keeping the primary model for complex analysis.

Changes:
- Add fast_model field to AgentBackend and IrisAgentService
- Wire fast model through agent setup and IrisAgent
- Update ParallelAnalyze to use and log the fast model
- Fix string truncation to respect UTF-8 character boundaries
- Add truncate_at_char_boundary() helper to debug.rs and status.rs
diff --git a/src/agents/core.rs b/src/agents/core.rs
@@ -9,19 +9,28 @@ use crate::git::GitRepo;
 #[derive(Debug, Clone)]
 pub struct AgentBackend {
     pub provider_name: String,
+    /// Primary model for complex tasks
     pub model: String,
+    /// Fast model for simple/bounded tasks (subagents, parsing, etc.)
+    pub fast_model: String,
 }
 
 impl AgentBackend {
-    pub fn new(provider_name: String, model: String) -> Self {
+    pub fn new(provider_name: String, model: String, fast_model: String) -> Self {
         Self {
             provider_name,
             model,
+            fast_model,
         }
     }
 
     /// Create backend from Git-Iris configuration
     pub fn from_config(config: &Config) -> Result<Self> {
+        let provider: crate::providers::Provider = config
+            .default_provider
+            .parse()
+            .map_err(|_| anyhow::anyhow!("Invalid provider: {}", config.default_provider))?;
+
         let provider_config = config
             .get_provider_config(&config.default_provider)
             .ok_or_else(|| {
@@ -30,7 +39,8 @@ impl AgentBackend {
 
         Ok(Self {
             provider_name: config.default_provider.clone(),
-            model: provider_config.model.clone(),
+            model: provider_config.effective_model(provider).to_string(),
+            fast_model: provider_config.effective_fast_model(provider).to_string(),
         })
     }
 }
diff --git a/src/agents/debug.rs b/src/agents/debug.rs
@@ -181,7 +181,7 @@ pub fn debug_tool_call(tool_name: &str, args: &str) {
 
     if !args.is_empty() {
         let truncated = if args.len() > 200 {
-            format!("{}...", &args[..200])
+            format!("{}...", truncate_at_char_boundary(args, 200))
         } else {
             args.to_string()
         };
@@ -193,14 +193,27 @@ pub fn debug_tool_call(tool_name: &str, args: &str) {
     }
 }
 
+/// Safely truncate a string at a character boundary
+fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> &str {
+    if s.len() <= max_bytes {
+        return s;
+    }
+    // Find the last valid char boundary at or before max_bytes
+    let mut end = max_bytes;
+    while end > 0 && !s.is_char_boundary(end) {
+        end -= 1;
+    }
+    &s[..end]
+}
+
 /// Print tool response information
 pub fn debug_tool_response(tool_name: &str, response: &str, duration: Duration) {
     if !is_debug_enabled() {
         return;
     }
 
     let truncated = if response.len() > 500 {
-        format!("{}...", &response[..500])
+        format!("{}...", truncate_at_char_boundary(response, 500))
     } else {
         response.to_string()
     };
@@ -255,7 +268,7 @@ pub fn debug_llm_request(prompt: &str, max_tokens: Option<usize>) {
     let lines: Vec<&str> = prompt.lines().take(5).collect();
     for line in lines {
         let truncated = if line.len() > 120 {
-            format!("{}...", &line[..120])
+            format!("{}...", truncate_at_char_boundary(line, 120))
         } else {
             line.to_string()
         };
@@ -364,7 +377,7 @@ pub fn debug_llm_response(response: &str, duration: Duration, tokens_used: Optio
     let truncated = if response.len() > 1000 {
         format!(
             "{}...\n\n... ({} more characters)",
-            &response[..1000],
+            truncate_at_char_boundary(response, 1000),
             response.len() - 1000
         )
     } else {
@@ -395,7 +408,7 @@ pub fn debug_json_parse_attempt(json_str: &str) {
 
     // Show first 500 chars
     let head = if json_str.len() > 500 {
-        format!("{}...", &json_str[..500])
+        format!("{}...", truncate_at_char_boundary(json_str, 500))
     } else {
         json_str.to_string()
     };
@@ -404,7 +417,11 @@ pub fn debug_json_parse_attempt(json_str: &str) {
     // Show last 200 chars to see where it got cut off
     if json_str.len() > 700 {
         println!("\n... truncated ...\n");
-        let tail_start = json_str.len().saturating_sub(200);
+        // Find a valid char boundary for the tail
+        let mut tail_start = json_str.len().saturating_sub(200);
+        while tail_start < json_str.len() && !json_str.is_char_boundary(tail_start) {
+            tail_start += 1;
+        }
         println!("{}", &json_str[tail_start..].truecolor(200, 200, 200));
     }
 }
diff --git a/src/agents/iris.rs b/src/agents/iris.rs
@@ -324,6 +324,8 @@ where
 pub struct IrisAgent {
     provider: String,
     model: String,
+    /// Fast model for subagents and simple tasks
+    fast_model: Option<String>,
     /// Current capability/task being executed
     current_capability: Option<String>,
     /// Provider configuration
@@ -343,13 +345,19 @@ impl IrisAgent {
         Ok(Self {
             provider: provider.to_string(),
             model: model.to_string(),
+            fast_model: None,
             current_capability: None,
             provider_config: HashMap::new(),
             preamble: None,
             config: None,
         })
     }
 
+    /// Get the effective fast model (configured or same as main model)
+    fn effective_fast_model(&self) -> &str {
+        self.fast_model.as_deref().unwrap_or(&self.model)
+    }
+
     /// Build the actual agent for execution
     fn build_agent(&self) -> Result<Agent<impl CompletionModel + 'static>> {
         use crate::agents::debug_tool::DebugTool;
@@ -396,9 +404,11 @@ You have access to Git tools, code analysis tools, and powerful sub-agent capabi
 
         // Build a simple sub-agent that can be delegated to
         // This sub-agent has tools but cannot spawn more sub-agents (prevents recursion)
+        // Uses fast model for cost efficiency since subagent tasks are focused/bounded
+        let fast_model = self.effective_fast_model();
         let client_builder = DynClientBuilder::new();
         let sub_agent_builder = client_builder
-            .agent(&self.provider, &self.model)
+            .agent(&self.provider, fast_model)
             .map_err(|e| anyhow::anyhow!("Failed to create sub-agent: {}", e))?
             .name("analyze_subagent")
             .description("Delegate focused analysis tasks to a sub-agent with its own context window. Use for analyzing specific files, commits, or code sections independently. The sub-agent has access to Git tools (diff, log, status) and file analysis tools.")
@@ -438,9 +448,10 @@ Guidelines:
             // Workspace for Iris's notes and task management
             .tool(DebugTool::new(Workspace::new()))
             // Parallel analysis for distributing work across multiple subagents
+            // Uses fast model for cost efficiency
             .tool(DebugTool::new(ParallelAnalyze::new(
                 &self.provider,
-                &self.model,
+                fast_model,
             )))
             // Sub-agent delegation (Rig's built-in agent-as-tool!)
             .tool(sub_agent)
@@ -490,7 +501,12 @@ Guidelines:
         let agent = self.build_agent()?;
         debug::debug_context_management(
             "Agent built with tools",
-            &format!("Provider: {}, Model: {}", self.provider, self.model),
+            &format!(
+                "Provider: {}, Model: {} (fast: {})",
+                self.provider,
+                self.model,
+                self.effective_fast_model()
+            ),
         );
 
         // Create JSON schema for the response type
@@ -816,6 +832,11 @@ Guidelines:
     pub fn set_config(&mut self, config: crate::config::Config) {
         self.config = Some(config);
     }
+
+    /// Set fast model for subagents
+    pub fn set_fast_model(&mut self, fast_model: String) {
+        self.fast_model = Some(fast_model);
+    }
 }
 
 /// Builder for creating `IrisAgent` instances with different configurations
diff --git a/src/agents/setup.rs b/src/agents/setup.rs
@@ -67,8 +67,9 @@ impl AgentSetupService {
             .with_model(&backend.model)
             .build()?;
 
-        // Pass config to agent for gitmoji and other features
+        // Pass config and fast model to agent
         agent.set_config(self.config.clone());
+        agent.set_fast_model(backend.fast_model);
 
         Ok(agent)
     }
@@ -190,16 +191,18 @@ pub struct IrisAgentService {
     git_repo: Option<Arc<GitRepo>>,
     provider: String,
     model: String,
+    fast_model: String,
 }
 
 impl IrisAgentService {
     /// Create a new service with explicit provider configuration
-    pub fn new(config: Config, provider: String, model: String) -> Self {
+    pub fn new(config: Config, provider: String, model: String, fast_model: String) -> Self {
         Self {
             config,
             git_repo: None,
             provider,
             model,
+            fast_model,
         }
     }
 
@@ -219,7 +222,12 @@ impl IrisAgentService {
         // Determine backend (provider/model) from config
         let backend = AgentBackend::from_config(&config)?;
 
-        let mut service = Self::new(config, backend.provider_name, backend.model);
+        let mut service = Self::new(
+            config,
+            backend.provider_name,
+            backend.model,
+            backend.fast_model,
+        );
 
         // Setup git repo
         if let Some(repo_url) = repository_url {
@@ -312,8 +320,9 @@ impl IrisAgentService {
             .with_model(&self.model)
             .build()?;
 
-        // Pass config to agent for gitmoji and other features
+        // Pass config and fast model to agent
         agent.set_config(self.config.clone());
+        agent.set_fast_model(self.fast_model.clone());
 
         Ok(agent)
     }
@@ -342,4 +351,9 @@ impl IrisAgentService {
     pub fn model(&self) -> &str {
         &self.model
     }
+
+    /// Get the fast model name (for subagents and simple tasks)
+    pub fn fast_model(&self) -> &str {
+        &self.fast_model
+    }
 }
diff --git a/src/agents/status.rs b/src/agents/status.rs
@@ -7,6 +7,18 @@ use ratatui::style::Color;
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 
+/// Safely truncate a string at a character boundary
+fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> &str {
+    if s.len() <= max_bytes {
+        return s;
+    }
+    let mut end = max_bytes;
+    while end > 0 && !s.is_char_boundary(end) {
+        end -= 1;
+    }
+    &s[..end]
+}
+
 /// Status phases for the Iris agent
 #[derive(Debug, Clone, PartialEq)]
 pub enum IrisPhase {
@@ -73,7 +85,7 @@ impl IrisStatus {
 
         // Constrain message to 80 characters as requested
         let constrained_message = if message.len() > 80 {
-            format!("{}...", &message[..77])
+            format!("{}...", truncate_at_char_boundary(&message, 77))
         } else {
             message
         };
@@ -99,7 +111,7 @@ impl IrisStatus {
     ) -> Self {
         // Constrain message to 80 characters
         let constrained_message = if message.len() > 80 {
-            format!("{}...", &message[..77])
+            format!("{}...", truncate_at_char_boundary(&message, 77))
         } else {
             message
         };
@@ -133,7 +145,7 @@ impl IrisStatus {
     /// Create error status
     pub fn error(error: &str) -> Self {
         let constrained_message = if error.len() > 35 {
-            format!("❌ {}...", &error[..32])
+            format!("❌ {}...", truncate_at_char_boundary(error, 32))
         } else {
             format!("❌ {error}")
         };
diff --git a/src/agents/tools/parallel_analyze.rs b/src/agents/tools/parallel_analyze.rs
@@ -18,6 +18,7 @@ use std::sync::Arc;
 use tokio::sync::Mutex;
 
 use super::{CodeSearch, FileRead, GitChangedFiles, GitDiff, GitLog, GitStatus, ProjectDocs};
+use crate::agents::debug as agent_debug;
 use crate::agents::debug_tool::DebugTool;
 
 /// Arguments for parallel analysis
@@ -157,6 +158,7 @@ impl SubagentRunner {
 /// Spawns multiple subagents to analyze different aspects concurrently
 pub struct ParallelAnalyze {
     runner: SubagentRunner,
+    model: String,
 }
 
 impl ParallelAnalyze {
@@ -170,7 +172,10 @@ impl ParallelAnalyze {
             SubagentRunner::new("openai", "gpt-4o").expect("OpenAI fallback should work")
         });
 
-        Self { runner }
+        Self {
+            runner,
+            model: model.to_string(),
+        }
     }
 }
 
@@ -221,9 +226,9 @@ impl Tool for ParallelAnalyze {
         let tasks = args.tasks;
         let num_tasks = tasks.len();
 
-        tracing::info!(
-            "🔀 ParallelAnalyze: Spawning {} subagents for parallel analysis",
-            num_tasks
+        agent_debug::debug_context_management(
+            "ParallelAnalyze",
+            &format!("Spawning {} subagents (fast model: {})", num_tasks, self.model),
         );
 
         // Collect results using Arc<Mutex> for thread-safe access
@@ -236,16 +241,8 @@ impl Tool for ParallelAnalyze {
             let results = Arc::clone(&results);
 
             let handle = tokio::spawn(async move {
-                tracing::debug!("🔹 Subagent starting: {}", &task[..task.len().min(50)]);
-
                 let result = runner.run_task(&task).await;
 
-                tracing::debug!(
-                    "🔹 Subagent completed: {} (success: {})",
-                    &task[..task.len().min(50)],
-                    result.success
-                );
-
                 let mut guard = results.lock().await;
                 guard.push(result);
             });
@@ -256,7 +253,7 @@ impl Tool for ParallelAnalyze {
         // Wait for all tasks to complete
         for handle in handles {
             if let Err(e) = handle.await {
-                tracing::warn!("Subagent task panicked: {}", e);
+                agent_debug::debug_warning(&format!("Subagent task panicked: {}", e));
             }
         }
 
@@ -269,11 +266,9 @@ impl Tool for ParallelAnalyze {
         let successful = final_results.iter().filter(|r| r.success).count();
         let failed = final_results.iter().filter(|r| !r.success).count();
 
-        tracing::info!(
-            "🔀 ParallelAnalyze complete: {}/{} successful in {}ms",
-            successful,
-            num_tasks,
-            execution_time_ms
+        agent_debug::debug_context_management(
+            "ParallelAnalyze",
+            &format!("{}/{} successful in {}ms", successful, num_tasks, execution_time_ms),
         );
 
         Ok(ParallelAnalyzeResult {
diff --git a/tests/agent_tests.rs b/tests/agent_tests.rs