Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/loopal-backend/src/limits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ pub struct ResourceLimits {
pub max_fetch_bytes: usize,
/// Default shell command timeout.
pub default_timeout: Duration,
/// Cooperative deadline checked between glob/grep walk entries: bounds
/// slow-but-responsive trees and returns partial results. A syscall stuck
/// on a dead mount can't be interrupted here — that case is bounded by the
/// runtime per-tool watchdog instead.
pub walk_timeout: Duration,
/// HTTP fetch timeout.
pub fetch_timeout: Duration,
/// Maximum image file size in bytes.
Expand All @@ -40,6 +45,7 @@ impl Default for ResourceLimits {
max_grep_matches: 500,
max_fetch_bytes: 5 * 1024 * 1024, // 5 MB
default_timeout: Duration::from_secs(300), // 5 min
walk_timeout: Duration::from_secs(30),
fetch_timeout: Duration::from_secs(30),
image_max_bytes: IMAGE_MAX_BYTES,
image_max_pixels: IMAGE_MAX_PIXELS,
Expand Down
103 changes: 65 additions & 38 deletions crates/loopal-backend/src/search/glob.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
//! Glob pattern search with file-type filtering and modification time.

use std::path::Path;
use std::time::UNIX_EPOCH;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::{Instant, UNIX_EPOCH};

use globset::Glob;
use ignore::WalkState;
use loopal_error::ToolIoError;
use loopal_tool_api::backend_types::{GlobEntry, GlobOptions, GlobSearchResult};
use loopal_tool_api::save_to_overflow_file;
use parking_lot::Mutex;

use crate::limits::ResourceLimits;
use crate::search::{overflow_fmt, walker};

/// Execute a glob search and return matching entries.
pub fn glob_search(
opts: &GlobOptions,
cwd: &Path,
Expand All @@ -25,50 +26,75 @@ pub fn glob_search(

let glob =
Glob::new(&opts.pattern).map_err(|e| ToolIoError::Other(format!("invalid glob: {e}")))?;
let matcher = glob.compile_matcher();
let max = opts.max_results.min(limits.max_glob_results).max(1);

let max = opts.max_results.min(limits.max_glob_results);
let Some(walker) = walker::build_walker(&search_path, opts.type_filter.as_deref()) else {
let Some(w) = walker::build_walker(&search_path, opts.type_filter.as_deref()) else {
return Ok(GlobSearchResult {
entries: Vec::new(),
truncated: false,
timed_out: false,
overflow_path: None,
});
};

let mut entries = Vec::new();
let mut truncated = false;

for entry in walker.build().flatten() {
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
let path = entry.path();
let rel = match path.strip_prefix(&search_path) {
Ok(r) => r,
Err(_) => continue,
};
if !matcher.is_match(rel) {
continue;
}
let modified_secs = entry
.metadata()
.ok()
.and_then(|m| m.modified().ok())
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_secs());

entries.push(GlobEntry {
path: path.to_string_lossy().into_owned(),
modified_secs,
});
let deadline = Instant::now() + limits.walk_timeout;
let done = Arc::new(AtomicBool::new(false));
let timed_out = Arc::new(AtomicBool::new(false));
let entries: Arc<Mutex<Vec<GlobEntry>>> = Arc::new(Mutex::new(Vec::new()));
let search_path = Arc::new(search_path);
let matcher = Arc::new(glob.compile_matcher());

if entries.len() >= max {
truncated = true;
break;
}
}
w.build_parallel().run(|| {
let done = Arc::clone(&done);
let timed_out = Arc::clone(&timed_out);
let entries = Arc::clone(&entries);
let search_path = Arc::clone(&search_path);
let matcher = Arc::clone(&matcher);
Box::new(move |entry| {
if done.load(Ordering::Relaxed) {
return WalkState::Quit;
}
if Instant::now() >= deadline {
done.store(true, Ordering::Relaxed);
timed_out.store(true, Ordering::Relaxed);
return WalkState::Quit;
}
let Ok(entry) = entry else {
return WalkState::Continue;
};
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
return WalkState::Continue;
}
let Ok(rel) = entry.path().strip_prefix(search_path.as_path()) else {
return WalkState::Continue;
};
if !matcher.is_match(rel) {
return WalkState::Continue;
}
let modified_secs = entry
.metadata()
.ok()
.and_then(|m| m.modified().ok())
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_secs());
let n = {
let mut guard = entries.lock();
guard.push(GlobEntry {
path: entry.path().to_string_lossy().into_owned(),
modified_secs,
});
guard.len()
};
if n >= max {
done.store(true, Ordering::Relaxed);
return WalkState::Quit;
}
WalkState::Continue
})
});

let entries = Arc::try_unwrap(entries).unwrap().into_inner();
let truncated = entries.len() >= max;
let overflow_path = if truncated {
Some(save_to_overflow_file(
&overflow_fmt::serialize_glob_results(&entries),
Expand All @@ -81,6 +107,7 @@ pub fn glob_search(
Ok(GlobSearchResult {
entries,
truncated,
timed_out: timed_out.load(Ordering::Relaxed),
overflow_path,
})
}
14 changes: 12 additions & 2 deletions crates/loopal-backend/src/search/grep.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::path::Path;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::time::Instant;

use globset::Glob;
use ignore::WalkState;
Expand Down Expand Up @@ -55,12 +56,14 @@ pub fn grep_search(
None => None,
};

let max = opts.max_matches.min(limits.max_grep_matches);
let max = opts.max_matches.min(limits.max_grep_matches).max(1);
let ctx_before = opts.context_before;
let ctx_after = opts.context_after;
let multiline = opts.multiline;
let total = Arc::new(AtomicUsize::new(0));
let done = Arc::new(AtomicBool::new(false));
let timed_out = Arc::new(AtomicBool::new(false));
let deadline = Instant::now() + limits.walk_timeout;
let results: Arc<Mutex<Vec<FileMatchResult>>> = Arc::new(Mutex::new(Vec::new()));
let search_path = Arc::new(search_path);
let glob_matcher = Arc::new(glob_matcher);
Expand All @@ -74,11 +77,17 @@ pub fn grep_search(
let search_path = Arc::clone(&search_path);
let total = Arc::clone(&total);
let done = Arc::clone(&done);
let timed_out = Arc::clone(&timed_out);
let results = Arc::clone(&results);
Box::new(move |entry| {
if done.load(Ordering::Relaxed) {
return WalkState::Quit;
}
if Instant::now() >= deadline {
done.store(true, Ordering::Relaxed);
timed_out.store(true, Ordering::Relaxed);
return WalkState::Quit;
}
let entry = match entry {
Ok(e) => e,
Err(_) => return WalkState::Continue,
Expand All @@ -100,9 +109,10 @@ pub fn grep_search(
});

let file_matches = Arc::try_unwrap(results).unwrap().into_inner();
let truncated = done.load(Ordering::Relaxed);
let truncated = total.load(Ordering::Relaxed) >= max;
Ok(GrepSearchResult {
total_match_count: total.load(Ordering::Relaxed),
timed_out: timed_out.load(Ordering::Relaxed),
overflow_path: maybe_save_overflow(truncated, &file_matches),
file_matches,
})
Expand Down
2 changes: 2 additions & 0 deletions crates/loopal-backend/src/search/grep_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub(crate) fn empty_result() -> GrepSearchResult {
GrepSearchResult {
file_matches: Vec::new(),
total_match_count: 0,
timed_out: false,
overflow_path: None,
}
}
Expand Down Expand Up @@ -90,6 +91,7 @@ pub(crate) fn search_single_file(
let overflow_path = maybe_save_overflow(truncated, &file_matches);
Ok(GrepSearchResult {
total_match_count: count,
timed_out: false,
file_matches,
overflow_path,
})
Expand Down
4 changes: 2 additions & 2 deletions crates/loopal-backend/src/search/walker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ use ignore::types::TypesBuilder;

/// Build a `WalkBuilder` with shared defaults.
///
/// * Follows symlinks.
/// * Does not follow symlinks (ripgrep default) — avoids cross-mount escape and traversal cycles.
/// * Respects `.gitignore` (ignore crate default).
/// * Applies file-type filtering when `type_filter` is given.
///
/// Returns `None` when `type_filter` names an unknown file type — the
/// caller should short-circuit with an empty result.
pub fn build_walker(search_path: &Path, type_filter: Option<&str>) -> Option<WalkBuilder> {
let mut builder = WalkBuilder::new(search_path);
builder.follow_links(true);
builder.follow_links(false);

if let Some(ty) = type_filter {
let mut tb = TypesBuilder::new();
Expand Down
4 changes: 4 additions & 0 deletions crates/loopal-backend/tests/suite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ mod approved_paths_test;
mod batch_test;
#[path = "suite/fetch_headers_test.rs"]
mod fetch_headers_test;
#[path = "suite/glob_parallel_test.rs"]
mod glob_parallel_test;
#[path = "suite/image_test.rs"]
mod image_test;
#[path = "suite/log_file_test.rs"]
Expand All @@ -15,5 +17,7 @@ mod path_approval_test;
mod process_group_test;
#[path = "suite/resolve_checked_test.rs"]
mod resolve_checked_test;
#[path = "suite/search_timeout_test.rs"]
mod search_timeout_test;
#[path = "suite/tmp_cleanup_test.rs"]
mod tmp_cleanup_test;
76 changes: 76 additions & 0 deletions crates/loopal-backend/tests/suite/glob_parallel_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use loopal_backend::ResourceLimits;
use loopal_backend::search::glob_search;
use loopal_tool_api::backend_types::GlobOptions;

fn glob_opts(pattern: &str) -> GlobOptions {
GlobOptions {
pattern: pattern.to_string(),
path: None,
type_filter: None,
max_results: 10_000,
}
}

#[test]
fn parallel_glob_finds_all_nested_matches() {
let tmp = tempfile::tempdir().unwrap();
std::fs::write(tmp.path().join("a.rs"), "").unwrap();
let sub = tmp.path().join("sub/deep");
std::fs::create_dir_all(&sub).unwrap();
std::fs::write(tmp.path().join("sub/b.rs"), "").unwrap();
std::fs::write(sub.join("c.rs"), "").unwrap();
std::fs::write(tmp.path().join("skip.txt"), "").unwrap();

let res = glob_search(
&glob_opts("**/*.rs"),
tmp.path(),
&ResourceLimits::default(),
)
.unwrap();

let paths: Vec<&str> = res.entries.iter().map(|e| e.path.as_str()).collect();
assert_eq!(res.entries.len(), 3);
assert!(paths.iter().any(|p| p.ends_with("a.rs")));
assert!(paths.iter().any(|p| p.ends_with("b.rs")));
assert!(paths.iter().any(|p| p.ends_with("c.rs")));
assert!(!res.truncated);
assert!(!res.timed_out);
}

#[test]
fn parallel_glob_truncates_at_max_results_with_tolerance() {
let tmp = tempfile::tempdir().unwrap();
for i in 0..50 {
std::fs::write(tmp.path().join(format!("f{i:02}.rs")), "").unwrap();
}
let limits = ResourceLimits {
max_glob_results: 10,
..ResourceLimits::default()
};

let res = glob_search(&glob_opts("**/*.rs"), tmp.path(), &limits).unwrap();

assert!(res.truncated);
assert!(!res.timed_out);
assert!(res.entries.len() >= 10);
assert!(res.entries.len() < 50);
}

#[cfg(unix)]
#[test]
fn glob_does_not_follow_symlinks() {
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path().join("root");
let outside = tmp.path().join("outside");
std::fs::create_dir_all(&root).unwrap();
std::fs::create_dir_all(&outside).unwrap();
std::fs::write(root.join("real.rs"), "").unwrap();
std::fs::write(outside.join("secret.rs"), "").unwrap();
std::os::unix::fs::symlink(&outside, root.join("link")).unwrap();

let res = glob_search(&glob_opts("**/*.rs"), &root, &ResourceLimits::default()).unwrap();

let paths: Vec<&str> = res.entries.iter().map(|e| e.path.as_str()).collect();
assert!(paths.iter().any(|p| p.ends_with("real.rs")));
assert!(!paths.iter().any(|p| p.ends_with("secret.rs")));
}
Loading
Loading