Skip to content

Commit 485a0f9

Browse files
committed
⚡️ Add fast model support for subagents to reduce API costs
Introduce a separate fast_model configuration that subagents and ParallelAnalyze use instead of the main model. This allows using cheaper, faster models (e.g., gpt-4o-mini) for bounded tasks while keeping the primary model for complex analysis. Changes: - Add fast_model field to AgentBackend and IrisAgentService - Wire fast model through agent setup and IrisAgent - Update ParallelAnalyze to use and log the fast model - Fix string truncation to respect UTF-8 character boundaries - Add truncate_at_char_boundary() helper to debug.rs and status.rs
1 parent fcda027 commit 485a0f9

File tree

7 files changed

+112
-37
lines changed

7 files changed

+112
-37
lines changed

src/agents/core.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,28 @@ use crate::git::GitRepo;
99
#[derive(Debug, Clone)]
1010
pub struct AgentBackend {
1111
pub provider_name: String,
12+
/// Primary model for complex tasks
1213
pub model: String,
14+
/// Fast model for simple/bounded tasks (subagents, parsing, etc.)
15+
pub fast_model: String,
1316
}
1417

1518
impl AgentBackend {
16-
pub fn new(provider_name: String, model: String) -> Self {
19+
pub fn new(provider_name: String, model: String, fast_model: String) -> Self {
1720
Self {
1821
provider_name,
1922
model,
23+
fast_model,
2024
}
2125
}
2226

2327
/// Create backend from Git-Iris configuration
2428
pub fn from_config(config: &Config) -> Result<Self> {
29+
let provider: crate::providers::Provider = config
30+
.default_provider
31+
.parse()
32+
.map_err(|_| anyhow::anyhow!("Invalid provider: {}", config.default_provider))?;
33+
2534
let provider_config = config
2635
.get_provider_config(&config.default_provider)
2736
.ok_or_else(|| {
@@ -30,7 +39,8 @@ impl AgentBackend {
3039

3140
Ok(Self {
3241
provider_name: config.default_provider.clone(),
33-
model: provider_config.model.clone(),
42+
model: provider_config.effective_model(provider).to_string(),
43+
fast_model: provider_config.effective_fast_model(provider).to_string(),
3444
})
3545
}
3646
}

src/agents/debug.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ pub fn debug_tool_call(tool_name: &str, args: &str) {
181181

182182
if !args.is_empty() {
183183
let truncated = if args.len() > 200 {
184-
format!("{}...", &args[..200])
184+
format!("{}...", truncate_at_char_boundary(args, 200))
185185
} else {
186186
args.to_string()
187187
};
@@ -193,14 +193,27 @@ pub fn debug_tool_call(tool_name: &str, args: &str) {
193193
}
194194
}
195195

196+
/// Safely truncate a string at a character boundary
197+
fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> &str {
198+
if s.len() <= max_bytes {
199+
return s;
200+
}
201+
// Find the last valid char boundary at or before max_bytes
202+
let mut end = max_bytes;
203+
while end > 0 && !s.is_char_boundary(end) {
204+
end -= 1;
205+
}
206+
&s[..end]
207+
}
208+
196209
/// Print tool response information
197210
pub fn debug_tool_response(tool_name: &str, response: &str, duration: Duration) {
198211
if !is_debug_enabled() {
199212
return;
200213
}
201214

202215
let truncated = if response.len() > 500 {
203-
format!("{}...", &response[..500])
216+
format!("{}...", truncate_at_char_boundary(response, 500))
204217
} else {
205218
response.to_string()
206219
};
@@ -255,7 +268,7 @@ pub fn debug_llm_request(prompt: &str, max_tokens: Option<usize>) {
255268
let lines: Vec<&str> = prompt.lines().take(5).collect();
256269
for line in lines {
257270
let truncated = if line.len() > 120 {
258-
format!("{}...", &line[..120])
271+
format!("{}...", truncate_at_char_boundary(line, 120))
259272
} else {
260273
line.to_string()
261274
};
@@ -364,7 +377,7 @@ pub fn debug_llm_response(response: &str, duration: Duration, tokens_used: Optio
364377
let truncated = if response.len() > 1000 {
365378
format!(
366379
"{}...\n\n... ({} more characters)",
367-
&response[..1000],
380+
truncate_at_char_boundary(response, 1000),
368381
response.len() - 1000
369382
)
370383
} else {
@@ -395,7 +408,7 @@ pub fn debug_json_parse_attempt(json_str: &str) {
395408

396409
// Show first 500 chars
397410
let head = if json_str.len() > 500 {
398-
format!("{}...", &json_str[..500])
411+
format!("{}...", truncate_at_char_boundary(json_str, 500))
399412
} else {
400413
json_str.to_string()
401414
};
@@ -404,7 +417,11 @@ pub fn debug_json_parse_attempt(json_str: &str) {
404417
// Show last 200 chars to see where it got cut off
405418
if json_str.len() > 700 {
406419
println!("\n... truncated ...\n");
407-
let tail_start = json_str.len().saturating_sub(200);
420+
// Find a valid char boundary for the tail
421+
let mut tail_start = json_str.len().saturating_sub(200);
422+
while tail_start < json_str.len() && !json_str.is_char_boundary(tail_start) {
423+
tail_start += 1;
424+
}
408425
println!("{}", &json_str[tail_start..].truecolor(200, 200, 200));
409426
}
410427
}

src/agents/iris.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,8 @@ where
324324
pub struct IrisAgent {
325325
provider: String,
326326
model: String,
327+
/// Fast model for subagents and simple tasks
328+
fast_model: Option<String>,
327329
/// Current capability/task being executed
328330
current_capability: Option<String>,
329331
/// Provider configuration
@@ -343,13 +345,19 @@ impl IrisAgent {
343345
Ok(Self {
344346
provider: provider.to_string(),
345347
model: model.to_string(),
348+
fast_model: None,
346349
current_capability: None,
347350
provider_config: HashMap::new(),
348351
preamble: None,
349352
config: None,
350353
})
351354
}
352355

356+
/// Get the effective fast model (configured or same as main model)
357+
fn effective_fast_model(&self) -> &str {
358+
self.fast_model.as_deref().unwrap_or(&self.model)
359+
}
360+
353361
/// Build the actual agent for execution
354362
fn build_agent(&self) -> Result<Agent<impl CompletionModel + 'static>> {
355363
use crate::agents::debug_tool::DebugTool;
@@ -396,9 +404,11 @@ You have access to Git tools, code analysis tools, and powerful sub-agent capabi
396404

397405
// Build a simple sub-agent that can be delegated to
398406
// This sub-agent has tools but cannot spawn more sub-agents (prevents recursion)
407+
// Uses fast model for cost efficiency since subagent tasks are focused/bounded
408+
let fast_model = self.effective_fast_model();
399409
let client_builder = DynClientBuilder::new();
400410
let sub_agent_builder = client_builder
401-
.agent(&self.provider, &self.model)
411+
.agent(&self.provider, fast_model)
402412
.map_err(|e| anyhow::anyhow!("Failed to create sub-agent: {}", e))?
403413
.name("analyze_subagent")
404414
.description("Delegate focused analysis tasks to a sub-agent with its own context window. Use for analyzing specific files, commits, or code sections independently. The sub-agent has access to Git tools (diff, log, status) and file analysis tools.")
@@ -438,9 +448,10 @@ Guidelines:
438448
// Workspace for Iris's notes and task management
439449
.tool(DebugTool::new(Workspace::new()))
440450
// Parallel analysis for distributing work across multiple subagents
451+
// Uses fast model for cost efficiency
441452
.tool(DebugTool::new(ParallelAnalyze::new(
442453
&self.provider,
443-
&self.model,
454+
fast_model,
444455
)))
445456
// Sub-agent delegation (Rig's built-in agent-as-tool!)
446457
.tool(sub_agent)
@@ -490,7 +501,12 @@ Guidelines:
490501
let agent = self.build_agent()?;
491502
debug::debug_context_management(
492503
"Agent built with tools",
493-
&format!("Provider: {}, Model: {}", self.provider, self.model),
504+
&format!(
505+
"Provider: {}, Model: {} (fast: {})",
506+
self.provider,
507+
self.model,
508+
self.effective_fast_model()
509+
),
494510
);
495511

496512
// Create JSON schema for the response type
@@ -816,6 +832,11 @@ Guidelines:
816832
pub fn set_config(&mut self, config: crate::config::Config) {
817833
self.config = Some(config);
818834
}
835+
836+
/// Set fast model for subagents
837+
pub fn set_fast_model(&mut self, fast_model: String) {
838+
self.fast_model = Some(fast_model);
839+
}
819840
}
820841

821842
/// Builder for creating `IrisAgent` instances with different configurations

src/agents/setup.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,9 @@ impl AgentSetupService {
6767
.with_model(&backend.model)
6868
.build()?;
6969

70-
// Pass config to agent for gitmoji and other features
70+
// Pass config and fast model to agent
7171
agent.set_config(self.config.clone());
72+
agent.set_fast_model(backend.fast_model);
7273

7374
Ok(agent)
7475
}
@@ -190,16 +191,18 @@ pub struct IrisAgentService {
190191
git_repo: Option<Arc<GitRepo>>,
191192
provider: String,
192193
model: String,
194+
fast_model: String,
193195
}
194196

195197
impl IrisAgentService {
196198
/// Create a new service with explicit provider configuration
197-
pub fn new(config: Config, provider: String, model: String) -> Self {
199+
pub fn new(config: Config, provider: String, model: String, fast_model: String) -> Self {
198200
Self {
199201
config,
200202
git_repo: None,
201203
provider,
202204
model,
205+
fast_model,
203206
}
204207
}
205208

@@ -219,7 +222,12 @@ impl IrisAgentService {
219222
// Determine backend (provider/model) from config
220223
let backend = AgentBackend::from_config(&config)?;
221224

222-
let mut service = Self::new(config, backend.provider_name, backend.model);
225+
let mut service = Self::new(
226+
config,
227+
backend.provider_name,
228+
backend.model,
229+
backend.fast_model,
230+
);
223231

224232
// Setup git repo
225233
if let Some(repo_url) = repository_url {
@@ -312,8 +320,9 @@ impl IrisAgentService {
312320
.with_model(&self.model)
313321
.build()?;
314322

315-
// Pass config to agent for gitmoji and other features
323+
// Pass config and fast model to agent
316324
agent.set_config(self.config.clone());
325+
agent.set_fast_model(self.fast_model.clone());
317326

318327
Ok(agent)
319328
}
@@ -342,4 +351,9 @@ impl IrisAgentService {
342351
pub fn model(&self) -> &str {
343352
&self.model
344353
}
354+
355+
/// Get the fast model name (for subagents and simple tasks)
356+
pub fn fast_model(&self) -> &str {
357+
&self.fast_model
358+
}
345359
}

src/agents/status.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@ use ratatui::style::Color;
77
use std::sync::{Arc, Mutex};
88
use std::time::{Duration, Instant};
99

10+
/// Safely truncate a string at a character boundary
11+
fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> &str {
12+
if s.len() <= max_bytes {
13+
return s;
14+
}
15+
let mut end = max_bytes;
16+
while end > 0 && !s.is_char_boundary(end) {
17+
end -= 1;
18+
}
19+
&s[..end]
20+
}
21+
1022
/// Status phases for the Iris agent
1123
#[derive(Debug, Clone, PartialEq)]
1224
pub enum IrisPhase {
@@ -73,7 +85,7 @@ impl IrisStatus {
7385

7486
// Constrain message to 80 characters as requested
7587
let constrained_message = if message.len() > 80 {
76-
format!("{}...", &message[..77])
88+
format!("{}...", truncate_at_char_boundary(&message, 77))
7789
} else {
7890
message
7991
};
@@ -99,7 +111,7 @@ impl IrisStatus {
99111
) -> Self {
100112
// Constrain message to 80 characters
101113
let constrained_message = if message.len() > 80 {
102-
format!("{}...", &message[..77])
114+
format!("{}...", truncate_at_char_boundary(&message, 77))
103115
} else {
104116
message
105117
};
@@ -133,7 +145,7 @@ impl IrisStatus {
133145
/// Create error status
134146
pub fn error(error: &str) -> Self {
135147
let constrained_message = if error.len() > 35 {
136-
format!("❌ {}...", &error[..32])
148+
format!("❌ {}...", truncate_at_char_boundary(error, 32))
137149
} else {
138150
format!("❌ {error}")
139151
};

src/agents/tools/parallel_analyze.rs

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::sync::Arc;
1818
use tokio::sync::Mutex;
1919

2020
use super::{CodeSearch, FileRead, GitChangedFiles, GitDiff, GitLog, GitStatus, ProjectDocs};
21+
use crate::agents::debug as agent_debug;
2122
use crate::agents::debug_tool::DebugTool;
2223

2324
/// Arguments for parallel analysis
@@ -157,6 +158,7 @@ impl SubagentRunner {
157158
/// Spawns multiple subagents to analyze different aspects concurrently
158159
pub struct ParallelAnalyze {
159160
runner: SubagentRunner,
161+
model: String,
160162
}
161163

162164
impl ParallelAnalyze {
@@ -170,7 +172,10 @@ impl ParallelAnalyze {
170172
SubagentRunner::new("openai", "gpt-4o").expect("OpenAI fallback should work")
171173
});
172174

173-
Self { runner }
175+
Self {
176+
runner,
177+
model: model.to_string(),
178+
}
174179
}
175180
}
176181

@@ -221,9 +226,9 @@ impl Tool for ParallelAnalyze {
221226
let tasks = args.tasks;
222227
let num_tasks = tasks.len();
223228

224-
tracing::info!(
225-
"🔀 ParallelAnalyze: Spawning {} subagents for parallel analysis",
226-
num_tasks
229+
agent_debug::debug_context_management(
230+
"ParallelAnalyze",
231+
&format!("Spawning {} subagents (fast model: {})", num_tasks, self.model),
227232
);
228233

229234
// Collect results using Arc<Mutex> for thread-safe access
@@ -236,16 +241,8 @@ impl Tool for ParallelAnalyze {
236241
let results = Arc::clone(&results);
237242

238243
let handle = tokio::spawn(async move {
239-
tracing::debug!("🔹 Subagent starting: {}", &task[..task.len().min(50)]);
240-
241244
let result = runner.run_task(&task).await;
242245

243-
tracing::debug!(
244-
"🔹 Subagent completed: {} (success: {})",
245-
&task[..task.len().min(50)],
246-
result.success
247-
);
248-
249246
let mut guard = results.lock().await;
250247
guard.push(result);
251248
});
@@ -256,7 +253,7 @@ impl Tool for ParallelAnalyze {
256253
// Wait for all tasks to complete
257254
for handle in handles {
258255
if let Err(e) = handle.await {
259-
tracing::warn!("Subagent task panicked: {}", e);
256+
agent_debug::debug_warning(&format!("Subagent task panicked: {}", e));
260257
}
261258
}
262259

@@ -269,11 +266,9 @@ impl Tool for ParallelAnalyze {
269266
let successful = final_results.iter().filter(|r| r.success).count();
270267
let failed = final_results.iter().filter(|r| !r.success).count();
271268

272-
tracing::info!(
273-
"🔀 ParallelAnalyze complete: {}/{} successful in {}ms",
274-
successful,
275-
num_tasks,
276-
execution_time_ms
269+
agent_debug::debug_context_management(
270+
"ParallelAnalyze",
271+
&format!("{}/{} successful in {}ms", successful, num_tasks, execution_time_ms),
277272
);
278273

279274
Ok(ParallelAnalyzeResult {

0 commit comments

Comments
 (0)