Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,14 @@ wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
# 2) 把单个 attachment_id 解密写出去(扩展名建议保留 .jpg / .mp4 等)
wx extract <attachment_id> -o ~/Desktop/photo.jpg
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
wx extract <attachment_id> -o /tmp/raw.wxgf --raw # 保留原始 WXGF/HEVC 容器
```

`attachments` 输出每条带:`attachment_id` / `kind` / `type` / `local_id` / `timestamp` / `time`,群聊里还有 `sender` 以及稳定身份三件套 `sender_username` / `sender_contact_display` / `sender_group_nickname`(语义同 `history` / `search` / `new-messages`:`sender_username` 是 wxid,用于两个同名成员之间的稳定区分;解析不到 wxid 时这三字段不输出)。当前 `kind` 固定为 `image`;命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。

`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
`extract` 输出报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际写出的图片格式:jpg / png / gif / webp 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。

微信 4 会把部分图片保存成内部 `WXGF/WXAM` 容器(解码后头部为 `wxgf`,报告里的 `source_format` 为 `hevc`)。默认 `wx extract` 会从 WXGF 中提取最大的 HEVC partition,并调用 `ffmpeg` 转成 JPG;报告会额外带 `source_format` / `source_size` / `transcoder` / `wxgf_partition_*`。如果本机没有 `ffmpeg`,请安装后重试,或用 `WX_FFMPEG=/path/to/ffmpeg` 指定路径;确实需要原始容器时传 `--raw`。如果微信里从未点开过该图片,本地通常只有 `_t.dat` 缩略图,先在微信客户端点开图片让它下载完整 `.dat`,再重新执行 `wx extract`。

支持的解码档位:
- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推)
Expand Down
5 changes: 4 additions & 1 deletion SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,11 +280,14 @@ wx attachments "AI群" --since 2026-04-01 --until 2026-04-15
# 2) 用 attachment_id 把单个资源解密写到指定路径
wx extract <attachment_id> -o ~/Desktop/photo.jpg
wx extract <attachment_id> -o /tmp/x.jpg --overwrite
wx extract <attachment_id> -o /tmp/raw.wxgf --raw # 保留原始 WXGF/HEVC 容器
```

`attachments` 输出每条带:`attachment_id` / `kind`(当前固定 `image`)/ `type` / `local_id` / `timestamp` / `time`,群聊里另带 `sender` 和稳定身份三件套(同上文)。命令名保留成 `attachments` 是为了后续扩到其他附件类型时不 break CLI。

`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际识别出的图片格式:jpg / png / gif / webp / hevc 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。
`extract` 报告里带:`md5` / `dat_path` / `dat_size` / `output` / `output_size` / `format`(实际写出的图片格式:jpg / png / gif / webp 等)/ `decoder`(实际选用的解码器:`legacy_xor` / `v1_aes` / `v2`)。

微信 4 图片可能解码成 `WXGF/WXAM` 容器(头部 `wxgf`,内部是 HEVC)。默认 `wx extract` 会自动提取 WXGF 里的最大 HEVC partition 并用 `ffmpeg` 转 JPG;报告会带 `source_format: hevc`、`source_size`、`transcoder` 和 `wxgf_partition_*`。如果只需要原始容器,传 `--raw`。如果本地只拿到 `_t.dat` 缩略图,输出会很小且文字图片不可读;让用户先在微信客户端点开图片,等完整 `.dat` 下载到本地后再重新 `wx extract`。

支持的解码档位:
- **legacy XOR**:早期单字节 XOR,无 magic(按文件首字节探测格式自动反推)
Expand Down
1 change: 1 addition & 0 deletions src/attachment/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use anyhow::{anyhow, Result};

pub mod v1_xor;
pub mod v2;
pub mod wxgf;

/// 完整 V2 magic:`\x07\x08V2\x08\x07`
pub const V2_MAGIC: [u8; 6] = [0x07, 0x08, b'V', b'2', 0x08, 0x07];
Expand Down
199 changes: 199 additions & 0 deletions src/attachment/decoder/wxgf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
//! WeChat 4 WXGF/WXAM image container support.
//!
//! `wxgf` is not a normal image format. It is a private WeChat container whose
//! largest data partition is usually an Annex B HEVC bitstream. We keep the
//! parser tiny: find HEVC start codes after the WXGF header, validate the
//! 4-byte big-endian length immediately before the start code, then hand the
//! largest partition to ffmpeg.

use anyhow::{bail, Context, Result};
use std::path::PathBuf;
use std::process::Command;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};

const WXGF_MAGIC: &[u8; 4] = b"wxgf";
const FFMPEG_ENV: &str = "WX_FFMPEG";

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct WxgfPartition {
pub offset: usize,
/// Partition byte length, including the HEVC start code at `offset`.
pub size: usize,
pub ratio: f64,
}

#[derive(Debug)]
pub struct WxgfJpeg {
pub data: Vec<u8>,
pub partition: WxgfPartition,
pub ffmpeg: String,
}

struct TempPaths {
input: PathBuf,
output: PathBuf,
}

impl Drop for TempPaths {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.input);
let _ = std::fs::remove_file(&self.output);
}
}

/// Return the largest HEVC Annex B partition inside a WXGF/WXAM container.
pub fn largest_partition(data: &[u8]) -> Result<WxgfPartition> {
if data.len() < 15 || &data[..4] != WXGF_MAGIC {
bail!("invalid WXGF image container");
}

let header_len = data[4] as usize;
if header_len >= data.len() {
bail!("invalid WXGF header length {}", header_len);
}

for pattern in [&[0x00, 0x00, 0x00, 0x01][..], &[0x00, 0x00, 0x01][..]] {
let mut partitions = Vec::new();
let mut rel_offset = 0usize;

while header_len + rel_offset < data.len() {
let search_from = header_len + rel_offset;
let Some(idx) = find_subslice(&data[search_from..], pattern) else {
break;
};
let abs_idx = search_from + idx;
if abs_idx < 4 {
rel_offset = rel_offset.saturating_add(idx + 1);
continue;
}

let size = u32::from_be_bytes(data[abs_idx - 4..abs_idx].try_into().unwrap()) as usize;
if size > 0 && abs_idx.checked_add(size).is_some_and(|end| end <= data.len()) {
partitions.push(WxgfPartition {
offset: abs_idx,
size,
ratio: size as f64 / data.len() as f64,
});
rel_offset = abs_idx - header_len + size;
} else {
rel_offset = abs_idx - header_len + 1;
}
}

if let Some(max) = partitions.into_iter().max_by_key(|p| p.size) {
return Ok(max);
}
}

bail!("WXGF image has no valid HEVC partition")
}

/// Convert a WXGF/WXAM image to JPEG through ffmpeg.
///
/// The ffmpeg path is resolved from `WX_FFMPEG`, then falls back to `ffmpeg` in
/// PATH. This avoids adding Python or native HEVC decoder dependencies.
pub fn transcode_to_jpeg(data: &[u8]) -> Result<WxgfJpeg> {
let partition = largest_partition(data)?;
let hevc = &data[partition.offset..partition.offset + partition.size];
let ffmpeg = std::env::var(FFMPEG_ENV).unwrap_or_else(|_| "ffmpeg".to_string());
let paths = temp_paths();

std::fs::write(&paths.input, hevc)
.with_context(|| format!("写出 WXGF/HEVC 临时输入失败:{}", paths.input.display()))?;

let output = Command::new(&ffmpeg)
.arg("-y")
.arg("-hide_banner")
.arg("-loglevel")
.arg("error")
.arg("-f")
.arg("hevc")
.arg("-i")
.arg(&paths.input)
.arg("-vframes")
.arg("1")
.arg("-c:v")
.arg("mjpeg")
.arg("-q:v")
.arg("4")
.arg(&paths.output)
.output()
.with_context(|| {
format!(
"启动 ffmpeg 失败;请安装 ffmpeg 或用 {FFMPEG_ENV} 指定路径,或用 wx extract --raw 导出原始 WXGF"
)
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
bail!(
"ffmpeg 转码 WXGF/HEVC 失败:{}",
stderr.trim().chars().take(800).collect::<String>()
);
}

let data = std::fs::read(&paths.output)
.with_context(|| format!("读取 ffmpeg 输出失败:{}", paths.output.display()))?;
if data.is_empty() {
bail!("ffmpeg 转码 WXGF/HEVC 成功但没有输出 JPEG 数据");
}

Ok(WxgfJpeg {
data,
partition,
ffmpeg,
})
}

fn temp_paths() -> TempPaths {
static COUNTER: AtomicU64 = AtomicU64::new(0);
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let seq = COUNTER.fetch_add(1, Ordering::Relaxed);
let stem = format!("wx-cli-wxgf-{}-{}-{}", std::process::id(), nanos, seq);
let dir = std::env::temp_dir();
TempPaths {
input: dir.join(format!("{}.hevc", stem)),
output: dir.join(format!("{}.jpg", stem)),
}
}

fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
if needle.is_empty() || needle.len() > haystack.len() {
return None;
}
haystack.windows(needle.len()).position(|w| w == needle)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn finds_largest_partition() {
let mut data = b"wxgf".to_vec();
data.push(19); // header length
data.extend_from_slice(&[0; 14]);

data.extend_from_slice(&8u32.to_be_bytes());
data.extend_from_slice(&[0, 0, 0, 1]);
data.extend_from_slice(&[1, 2, 3, 4]);

let second_offset = data.len() + 4;
data.extend_from_slice(&12u32.to_be_bytes());
data.extend_from_slice(&[0, 0, 0, 1]);
data.extend_from_slice(&[5, 6, 7, 8, 9, 10, 11, 12]);

let p = largest_partition(&data).unwrap();
assert_eq!(p.offset, second_offset);
assert_eq!(p.size, 12);
}

#[test]
fn rejects_non_wxgf() {
let err = largest_partition(b"not wxgf").unwrap_err().to_string();
assert!(err.contains("WXGF"));
}
}
2 changes: 2 additions & 0 deletions src/cli/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ pub fn cmd_extract(
attachment_id: String,
output: String,
overwrite: bool,
raw: bool,
json: bool,
) -> Result<()> {
let req = Request::Extract {
attachment_id,
output,
overwrite,
raw,
};
let resp = transport::send(req)?;
print_value(&resp.data, &resolve(json))
Expand Down
6 changes: 5 additions & 1 deletion src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ enum Commands {
/// 目标已存在时覆盖
#[arg(long)]
overwrite: bool,
/// 原样导出解码后的附件数据;WXGF/HEVC 图片不转 JPG
#[arg(long)]
raw: bool,
/// 输出 JSON(默认 YAML)
#[arg(long)]
json: bool,
Expand Down Expand Up @@ -518,8 +521,9 @@ fn dispatch(cli: Cli) -> Result<()> {
attachment_id,
output,
overwrite,
raw,
json,
} => extract::cmd_extract(attachment_id, output, overwrite, json),
} => extract::cmd_extract(attachment_id, output, overwrite, raw, json),
Commands::Daemon { cmd } => daemon_cmd::cmd_daemon(cmd),
}
}
51 changes: 45 additions & 6 deletions src/daemon/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4483,6 +4483,7 @@ pub async fn q_extract(
attachment_id: &str,
output: &str,
overwrite: bool,
raw: bool,
) -> Result<Value> {
use crate::attachment::{
attachment_id::AttachmentId,
Expand Down Expand Up @@ -4573,25 +4574,63 @@ pub async fn q_extract(
};

let decoded = decoder::dispatch(&dat_bytes, v2_key)?;
let source_format = decoded.format;
let source_size = decoded.data.len();
let mut output_format = source_format.to_string();
let mut decoder_name = decoded.decoder.to_string();
let mut output_data = decoded.data;
let mut wxgf_partition_offset: Option<usize> = None;
let mut wxgf_partition_size: Option<usize> = None;
let mut wxgf_partition_ratio: Option<f64> = None;
let mut transcoder: Option<String> = None;

if source_format == "hevc" && !raw {
let jpg = decoder::wxgf::transcode_to_jpeg(&output_data)
.context("WXGF/HEVC 图片转 JPG 失败;可安装 ffmpeg 或用 wx extract --raw 导出原始 WXGF")?;
wxgf_partition_offset = Some(jpg.partition.offset);
wxgf_partition_size = Some(jpg.partition.size);
wxgf_partition_ratio = Some(jpg.partition.ratio);
transcoder = Some(format!("ffmpeg:{}", jpg.ffmpeg));
output_data = jpg.data;
output_format = "jpg".to_string();
decoder_name.push_str("+wxgf_ffmpeg");
}

// 写盘
std::fs::write(&output_path2, &decoded.data)
std::fs::write(&output_path2, &output_data)
.with_context(|| format!("写出文件失败:{}", output_path2.display()))?;

// 注意:不要在这里塞 `ok: true`。dispatch 会用 Response::ok(v) 包一层,
// Response 的 `data: Value` 字段是 #[serde(flatten)] 写出的,本 payload
// 的 `ok` 会和 Response 自带的 `ok` 在线上拼成两个同名 key,CLI 反序列化时
// serde_json 直接报 "duplicate field",业务请求看上去像 daemon 解析失败。
Ok(json!({
let mut report = json!({
"kind": id_for_task.kind.as_str(),
"md5": resolved.md5,
"dat_path": resolved.dat_path.display().to_string(),
"dat_size": resolved.size,
"output": output_path2.display().to_string(),
"output_size": decoded.data.len(),
"format": decoded.format,
"decoder": decoded.decoder,
}))
"output_size": output_data.len(),
"format": output_format,
"decoder": decoder_name,
});
if source_format != report["format"].as_str().unwrap_or_default() {
report["source_format"] = json!(source_format);
report["source_size"] = json!(source_size);
}
if let Some(transcoder) = transcoder {
report["transcoder"] = json!(transcoder);
}
if let Some(offset) = wxgf_partition_offset {
report["wxgf_partition_offset"] = json!(offset);
}
if let Some(size) = wxgf_partition_size {
report["wxgf_partition_size"] = json!(size);
}
if let Some(ratio) = wxgf_partition_ratio {
report["wxgf_partition_ratio"] = json!(ratio);
}
Ok(report)
})
.await??;

Expand Down
3 changes: 2 additions & 1 deletion src/daemon/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,8 @@ async fn dispatch(req: Request, db: &DbCache, names: &tokio::sync::RwLock<Arc<Na
attachment_id,
output,
overwrite,
} => match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite).await {
raw,
} => match query::q_extract(db, &names_arc, &attachment_id, &output, overwrite, raw).await {
Ok(v) => Response::ok(v),
Err(e) => Response::err(e.to_string()),
},
Expand Down
3 changes: 3 additions & 0 deletions src/ipc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ pub enum Request {
/// 已存在时是否覆盖
#[serde(default)]
overwrite: bool,
/// 原样导出解码后的附件数据;图片为 WXGF/HEVC 时不调用 ffmpeg 转 JPG
#[serde(default, skip_serializing_if = "is_false")]
raw: bool,
},
}

Expand Down