Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/autofix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- name: Cargo Clippy
run: cargo +nightly clippy --all-features --workspace --all-targets --fix --allow-dirty -- -D warnings
- name: Cargo Clippy String Safety
run: cargo +nightly clippy --all-features --workspace -- -D clippy::string_slice -D clippy::indexing_slicing
run: cargo +nightly clippy --all-features --workspace -- -D clippy::string_slice -D clippy::indexing_slicing -D clippy::disallowed_methods
- name: Autofix
uses: autofix-ci/action@7a166d7532b277f34e16238930461bf77f9d7ed8
concurrency:
Expand Down
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ aws-smithy-runtime-api = "1.11.3"
aws-smithy-async = { version = "1.2.11", features = ["rt-tokio"] }
aws-smithy-runtime = { version = "1.10", features = ["connector-hyper-0-14-x", "tls-rustls"] }
base64 = "0.22.1"
bstr = "1.12.1"
bytes = "1.11.1"
chrono = { version = "0.4.44", features = ["serde"] }
clap = { version = "4.6.0", features = ["derive"] }
Expand Down
3 changes: 3 additions & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
disallowed-methods = [
{ path = "std::string::String::from_utf8_lossy", reason = "Prefer bstr ByteSlice decoding so lossy UTF-8 handling stays explicit and consistent across byte-oriented paths.", replacement = "bstr::ByteSlice::to_str_lossy" },
]
2 changes: 2 additions & 0 deletions crates/forge_ci/src/jobs/lint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ pub fn clippy_string_safety_cmd(fix: bool) -> String {
"clippy::string_slice",
"-D",
"clippy::indexing_slicing",
"-D",
"clippy::disallowed_methods",
]);

cargo_cmd(&parts)
Expand Down
1 change: 1 addition & 0 deletions crates/forge_fs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ sha2.workspace = true
hex.workspace = true
infer = "0.19.0" # For binary file detection
thiserror = "2.0"
bstr.workspace = true

forge_domain.workspace = true

Expand Down
3 changes: 2 additions & 1 deletion crates/forge_fs/src/read.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::path::Path;

use anyhow::{Context, Result};
use bstr::ByteSlice;

impl crate::ForgeFS {
pub async fn read_utf8<T: AsRef<Path>>(path: T) -> Result<String> {
Self::read(path)
.await
.map(|bytes| String::from_utf8_lossy(&bytes).to_string())
.map(|bytes| bytes.to_str_lossy().to_string())
}

pub async fn read<T: AsRef<Path>>(path: T) -> Result<Vec<u8>> {
Expand Down
3 changes: 2 additions & 1 deletion crates/forge_fs/src/read_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::cmp;
use std::path::Path;

use anyhow::{Context, Result};
use bstr::ByteSlice;
use forge_domain::FileInfo;

use crate::error::Error;
Expand Down Expand Up @@ -50,7 +51,7 @@ impl crate::ForgeFS {
let content = tokio::fs::read(path_ref)
.await
.with_context(|| format!("Failed to read file content from {}", path_ref.display()))?;
let content = String::from_utf8_lossy(&content);
let content = content.to_str_lossy();

// Hash the full file content so callers get a stable, whole-file hash
// that matches what the external-change detector reads back from disk.
Expand Down
1 change: 1 addition & 0 deletions crates/forge_infra/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ tokio.workspace = true
serde_json.workspace = true
reqwest.workspace = true

bstr.workspace = true
bytes.workspace = true
pretty_assertions.workspace = true
forge_select.workspace = true
Expand Down
4 changes: 3 additions & 1 deletion crates/forge_infra/src/console.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ mod tests {
use std::io::Cursor;
use std::thread;

use bstr::ByteSlice;

use super::*;

#[test]
Expand Down Expand Up @@ -112,7 +114,7 @@ mod tests {
assert!(
valid_orderings.contains(&actual),
"Output was interleaved: {:?}",
String::from_utf8_lossy(&actual)
actual.as_slice().to_str_lossy()
);
}

Expand Down
114 changes: 110 additions & 4 deletions crates/forge_infra/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;

use bstr::ByteSlice;
use forge_app::CommandInfra;
use forge_domain::{CommandOutput, ConsoleWriter as OutputPrinterTrait, Environment};
use tokio::io::AsyncReadExt;
Expand Down Expand Up @@ -140,8 +141,8 @@ impl ForgeCommandExecutorService {
drop(ready);

Ok(CommandOutput {
stdout: String::from_utf8_lossy(&stdout_buffer).into_owned(),
stderr: String::from_utf8_lossy(&stderr_buffer).into_owned(),
stdout: stdout_buffer.to_str_lossy().into_owned(),
stderr: stderr_buffer.to_str_lossy().into_owned(),
exit_code: status.code(),
command,
})
Expand Down Expand Up @@ -190,20 +191,52 @@ async fn stream<A: AsyncReadExt + Unpin, W: Write>(
let mut output = Vec::new();
if let Some(io) = io.as_mut() {
let mut buff = [0; 1024];
// Carry incomplete trailing UTF-8 codepoint bytes across reads — Windows
// console stdio rejects even one byte of a split codepoint.
let mut pending = Vec::<u8>::new();
loop {
let n = io.read(&mut buff).await?;
if n == 0 {
break;
}
writer.write_all(buff.get(..n).unwrap_or(&[]))?;
let chunk = buff.get(..n).unwrap_or(&[]);
output.extend_from_slice(chunk);

let mut working = std::mem::take(&mut pending);
working.extend_from_slice(chunk);
pending = write_lossy_utf8(&mut writer, &working)?;
// note: flush is necessary else we get the cursor could not be found error.
writer.flush()?;
output.extend_from_slice(buff.get(..n).unwrap_or(&[]));
}
// Flush dangling bytes from a stream that ended mid-codepoint.
if !pending.is_empty() {
writer.write_all(pending.to_str_lossy().as_bytes())?;
writer.flush()?;
}
}
Ok(output)
}

/// Writes `buf` as valid UTF-8 (invalid bytes → `U+FFFD`) and returns any
/// incomplete trailing codepoint bytes for the caller to carry into the next
/// chunk.
fn write_lossy_utf8<W: Write>(writer: &mut W, buf: &[u8]) -> io::Result<Vec<u8>> {
let mut chunks = ByteSlice::utf8_chunks(buf).peekable();

while let Some(chunk) = chunks.next() {
writer.write_all(chunk.valid().as_bytes())?;

if !chunk.invalid().is_empty() {
if chunk.incomplete() && chunks.peek().is_none() {
return Ok(chunk.invalid().to_vec());
}
writer.write_all("\u{FFFD}".as_bytes())?;
}
}

Ok(Vec::new())
}

/// The implementation for CommandExecutorService
#[async_trait::async_trait]
impl CommandInfra for ForgeCommandExecutorService {
Expand Down Expand Up @@ -429,4 +462,77 @@ mod tests {
assert_eq!(actual.stderr, expected.stderr);
assert_eq!(actual.success(), expected.success());
}

mod write_lossy_utf8 {
use pretty_assertions::assert_eq;

use super::super::write_lossy_utf8;

fn run(buf: &[u8]) -> (Vec<u8>, Vec<u8>) {
let mut out = Vec::<u8>::new();
let pending = write_lossy_utf8(&mut out, buf).unwrap();
(out, pending)
}

#[test]
fn valid_ascii_passes_through() {
let (out, pending) = run(b"hello");
assert_eq!(out, b"hello");
assert!(pending.is_empty());
}

#[test]
fn valid_multibyte_passes_through() {
// "héllo ✓" — mixed 2-byte and 3-byte codepoints.
let input = "héllo ✓".as_bytes();
let (out, pending) = run(input);
assert_eq!(out, input);
assert!(pending.is_empty());
}

#[test]
fn incomplete_trailing_codepoint_is_buffered() {
// "é" is 0xC3 0xA9 — leading byte alone must be held back.
let (out, pending) = run(&[b'a', 0xC3]);
assert_eq!(out, b"a");
assert_eq!(pending, vec![0xC3]);
}

#[test]
fn multibyte_split_across_two_chunks_emits_once_whole() {
let mut out = Vec::<u8>::new();
let pending = write_lossy_utf8(&mut out, &[b'a', 0xC3]).unwrap();
assert_eq!(pending, vec![0xC3]);
assert_eq!(out, b"a");

let mut working = pending;
working.push(0xA9);
let pending = write_lossy_utf8(&mut out, &working).unwrap();
assert!(pending.is_empty());
assert_eq!(out, "aé".as_bytes());
}

#[test]
fn invalid_byte_in_middle_becomes_replacement() {
let (out, pending) = run(&[b'a', 0xFF, b'b']);
assert_eq!(out, "a\u{FFFD}b".as_bytes());
assert!(pending.is_empty());
}

#[test]
fn lone_continuation_byte_becomes_replacement() {
let (out, pending) = run(&[b'a', 0x80, b'b']);
assert_eq!(out, "a\u{FFFD}b".as_bytes());
assert!(pending.is_empty());
}

#[test]
fn windows_1252_smart_quote_becomes_replacement() {
// Regression: 0x91/0x92 land as bare continuation bytes and broke
// console stdio on Windows before this fix.
let (out, pending) = run(b"quote: \x91hi\x92");
assert_eq!(out, "quote: \u{FFFD}hi\u{FFFD}".as_bytes());
assert!(pending.is_empty());
}
}
}
5 changes: 3 additions & 2 deletions crates/forge_infra/src/mcp_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::str::FromStr;
use std::sync::{Arc, OnceLock, RwLock};

use backon::{ExponentialBuilder, Retryable};
use bstr::ByteSlice;
use forge_app::McpClientInfra;
use forge_domain::{
Environment, Image, McpHttpServer, McpServerConfig, ToolDefinition, ToolName, ToolOutput,
Expand Down Expand Up @@ -453,7 +454,7 @@ impl ForgeMcpClient {
// Read the HTTP request
let mut buf = vec![0u8; 4096];
let n = stream.read(&mut buf).await?;
let request = String::from_utf8_lossy(buf.get(..n).unwrap_or(&[]));
let request = buf.get(..n).unwrap_or(&[]).to_str_lossy();
let first_line = request.lines().next().unwrap_or("");
let path = first_line.split_whitespace().nth(1).unwrap_or("/");

Expand Down Expand Up @@ -681,7 +682,7 @@ pub async fn mcp_auth(server_url: &str, env: &Environment) -> anyhow::Result<()>
use tokio::io::{AsyncReadExt, AsyncWriteExt};
let mut buf = vec![0u8; 4096];
let n = stream.read(&mut buf).await?;
let request = String::from_utf8_lossy(buf.get(..n).unwrap_or(&[]));
let request = buf.get(..n).unwrap_or(&[]).to_str_lossy();
let first_line = request.lines().next().unwrap_or("");
let path = first_line.split_whitespace().nth(1).unwrap_or("/");
let query_start = path.find('?').unwrap_or(path.len());
Expand Down
1 change: 1 addition & 0 deletions crates/forge_main/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ forge_config.workspace = true
forge_walker.workspace = true
forge_display.workspace = true
forge_tracker.workspace = true
bstr.workspace = true

forge_spinner.workspace = true
forge_select.workspace = true
Expand Down
3 changes: 2 additions & 1 deletion crates/forge_main/src/sandbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::path::PathBuf;
use std::process::Command;

use anyhow::{Context, Result, bail};
use bstr::ByteSlice;
use forge_domain::TitleFormat;

use crate::title_display::TitleDisplayExt;
Expand Down Expand Up @@ -120,7 +121,7 @@ impl<'a> Sandbox<'a> {
.context("Failed to create git worktree")?;

if !worktree_output.status.success() {
let stderr = String::from_utf8_lossy(&worktree_output.stderr);
let stderr = worktree_output.stderr.to_str_lossy();
bail!("Failed to create git worktree: {stderr}");
}

Expand Down
10 changes: 6 additions & 4 deletions crates/forge_main/src/stream_renderer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::borrow::Cow;
use std::io;
use std::sync::{Arc, Mutex};

use anyhow::Result;
use bstr::ByteSlice;
use colored::Colorize;
use forge_domain::ConsoleWriter;
use forge_markdown_stream::StreamdownRenderer;
Expand Down Expand Up @@ -198,11 +200,11 @@ impl<P: ConsoleWriter> io::Write for StreamDirectWriter<P> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.pause_spinner();

let content = match std::str::from_utf8(buf) {
Ok(s) => s.to_string(),
Err(_) => String::from_utf8_lossy(buf).into_owned(),
let content = match buf.to_str() {
Ok(content) => Cow::Borrowed(content),
Err(_) => buf.to_str_lossy(),
};
let styled = self.style.apply(content);
let styled = self.style.apply(content.into_owned());
self.printer.write(styled.as_bytes())?;
self.printer.flush()?;

Expand Down
1 change: 1 addition & 0 deletions crates/forge_services/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ serde_yml.workspace = true
gray_matter.workspace = true
merge.workspace = true
strip-ansi-escapes.workspace = true
bstr.workspace = true
forge_app.workspace = true
url.workspace = true
reqwest-eventsource.workspace = true
Expand Down
5 changes: 3 additions & 2 deletions crates/forge_services/src/tool_services/fs_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;

use anyhow::{Context, anyhow};
use bstr::ByteSlice;
use forge_app::{
FileInfoInfra, FileReaderInfra, FsSearchService, Match, MatchResult, SearchResult, Walker,
WalkerInfra,
Expand Down Expand Up @@ -386,7 +387,7 @@ impl Sink for ContextSink {
// Store the current match (before_context is already accumulated, after_context
// will be added via context() calls)
let line_num = mat.line_number().unwrap_or(0) as usize;
let line = String::from_utf8_lossy(mat.bytes()).trim_end().to_string();
let line = mat.bytes().to_str_lossy().trim_end().to_string();
self.current_match = Some((line_num, line));

Ok(true)
Expand All @@ -397,7 +398,7 @@ impl Sink for ContextSink {
_searcher: &Searcher,
ctx: &SinkContext<'_>,
) -> Result<bool, Self::Error> {
let line = String::from_utf8_lossy(ctx.bytes()).trim_end().to_string();
let line = ctx.bytes().to_str_lossy().trim_end().to_string();

match ctx.kind() {
SinkContextKind::Before => {
Expand Down
Loading
Loading