Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions src/formats/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@
Ok(gt)
}

pub fn save<'t, W: Write, T: 't, X: Taxonomy<'t, T>>(

Check warning on line 347 in src/formats/json.rs

View workflow job for this annotation

GitHub Actions / clippy

bound is defined in more than one place
writer: W,
taxonomy: &'t X,
format: JsonFormat,
Expand All @@ -369,7 +369,79 @@
Ok(())
}

/// Memory-efficient streaming tree export that writes JSON incrementally
/// instead of building the entire tree structure in memory first.
pub fn save_tree_streaming<'t, W: Write, T: 't, X: Taxonomy<'t, T>>(

Check warning on line 374 in src/formats/json.rs

View workflow job for this annotation

GitHub Actions / clippy

bound is defined in more than one place
mut writer: W,
taxonomy: &'t X,
root_node: Option<T>,
) -> TaxonomyResult<()>
where
T: Clone + Debug + Display + Eq + Hash + PartialEq,
{
let tax_id = root_node
.or_else(|| {
if taxonomy.is_empty() {
None
} else {
Some(taxonomy.root())
}
})
.ok_or(Error::new(ErrorKind::InvalidTaxonomy(
"Taxonomy must have a root node.".to_string(),
)))?;

write_node_streaming(taxonomy, tax_id, &mut writer)?;
writer.flush()?;
Ok(())
}

fn write_node_streaming<'t, W: Write, T: 't>(

Check warning on line 399 in src/formats/json.rs

View workflow job for this annotation

GitHub Actions / clippy

bound is defined in more than one place
tax: &'t impl Taxonomy<'t, T>,
tax_id: T,
writer: &mut W,
) -> TaxonomyResult<()>
where
T: Clone + Debug + Display + Eq + Hash + PartialEq,
{
write!(writer, "{{")?;

// Write id
write!(writer, "\"id\":")?;
to_writer(&mut *writer, &tax_id.to_string())?;

// Write name
write!(writer, ",\"name\":")?;
to_writer(&mut *writer, &tax.name(tax_id.clone())?)?;

// Write rank
write!(writer, ",\"rank\":")?;
to_writer(&mut *writer, tax.rank(tax_id.clone())?.to_ncbi_rank())?;

// Write extra data fields
let data = tax.data(tax_id.clone())?;
for (key, value) in data.iter() {
write!(writer, ",{}", serde_json::to_string(key)?)?;
write!(writer, ":")?;
to_writer(&mut *writer, value)?;
}

// Write children (always include, even if empty, to match regular format)
let children = tax.children(tax_id)?;
write!(writer, ",\"children\":[")?;
for (i, child) in children.into_iter().enumerate() {
if i > 0 {
write!(writer, ",")?;
}
write_node_streaming(tax, child, writer)?;
}
write!(writer, "]")?;

write!(writer, "}}")?;
Ok(())
}

fn serialize_as_tree<'t, T: 't>(

Check warning on line 444 in src/formats/json.rs

View workflow job for this annotation

GitHub Actions / clippy

bound is defined in more than one place
taxonomy: &'t impl Taxonomy<'t, T>,
root_node: Option<T>,
) -> TaxonomyResult<Value>
Expand All @@ -380,7 +452,7 @@
"Taxonomy must have a root node.".to_string(),
)))?;

fn inner<'t, T: 't>(tax: &'t impl Taxonomy<'t, T>, tax_id: T) -> TaxonomyResult<TaxNodeTree>

Check warning on line 455 in src/formats/json.rs

View workflow job for this annotation

GitHub Actions / clippy

bound is defined in more than one place
where
T: Clone + Debug + Display + Eq + Hash + PartialEq,
{
Expand All @@ -401,7 +473,7 @@
Ok(to_value(inner(taxonomy, tax_id)?)?)
}

fn serialize_as_node_links<'t, T: 't>(

Check warning on line 476 in src/formats/json.rs

View workflow job for this annotation

GitHub Actions / clippy

bound is defined in more than one place
tax: &'t impl Taxonomy<'t, T>,
root_node: Option<T>,
) -> TaxonomyResult<Value>
Expand Down Expand Up @@ -720,4 +792,116 @@
let example = r#"{"id": "1", "rank": null, "name": ""}"#;
assert!(load(Cursor::new(example), None).is_ok());
}

#[test]
fn streaming_tree_matches_regular_tree() {
// Create a taxonomy with multiple nodes and extra data
let example = r#"{
"id": "1",
"name": "root",
"rank": "no rank",
"custom_field": "value1",
"children": [
{
"id": "2",
"name": "Bacteria",
"rank": "superkingdom",
"genetic_code_id": "11",
"children": [
{
"id": "562",
"name": "Escherichia coli",
"rank": "species",
"genetic_code_id": "11",
"embl_code": "EC",
"division_id": "0",
"name_common_name": "E. coli"
}
]
}
]
}"#;

let tax = load(Cursor::new(example), None).unwrap();

// Save using regular method
let mut regular_output = Vec::new();
save::<_, &str, _>(&mut regular_output, &tax, JsonFormat::Tree, None).unwrap();

// Save using streaming method
let mut streaming_output = Vec::new();
save_tree_streaming::<_, &str, _>(&mut streaming_output, &tax, None).unwrap();

// Parse both outputs
let regular_json: Value = serde_json::from_slice(&regular_output).unwrap();
let streaming_json: Value = serde_json::from_slice(&streaming_output).unwrap();

// They should be equivalent
assert_eq!(regular_json, streaming_json);

// Verify we can reload from streaming output
let tax2 = load(Cursor::new(&streaming_output[..]), None).unwrap();
assert_eq!(Taxonomy::<&str>::len(&tax2), 3);
assert_eq!(
Taxonomy::<&str>::name(&tax2, "562").unwrap(),
"Escherichia coli"
);

// Verify extra data is preserved
let data = Taxonomy::<&str>::data(&tax2, "562").unwrap();
assert_eq!(
data.get("genetic_code_id").and_then(|v| v.as_str()),
Some("11")
);
assert_eq!(data.get("embl_code").and_then(|v| v.as_str()), Some("EC"));
assert_eq!(
data.get("name_common_name").and_then(|v| v.as_str()),
Some("E. coli")
);
}

#[test]
fn streaming_handles_large_data_fields() {
// Test that streaming handles nodes with lots of extra fields (like NCBI data)
let example = r#"{
"id": "562",
"name": "Escherichia coli",
"rank": "species",
"genetic_code_id": "11",
"division_id": "0",
"inherited_div_flag": "1",
"mitochondrial_genetic_code_id": "0",
"inherited_MGC_flag": "1",
"GenBank_hidden_flag": "0",
"hidden_subtree_root_flag": "0",
"comments": "",
"name_scientific_name": "Escherichia coli",
"name_common_name": "E. coli",
"name_synonym": "Bacterium coli",
"name_authority": "Escherichia coli (Migula 1895) Castellani and Chalmers 1919"
}"#;

let tax = load(Cursor::new(example), None).unwrap();

let mut streaming_output = Vec::new();
save_tree_streaming::<_, &str, _>(&mut streaming_output, &tax, None).unwrap();

// Reload and verify all fields preserved
let tax2 = load(Cursor::new(&streaming_output[..]), None).unwrap();
let data = Taxonomy::<&str>::data(&tax2, "562").unwrap();

assert_eq!(
data.get("genetic_code_id").and_then(|v| v.as_str()),
Some("11")
);
assert_eq!(
data.get("name_common_name").and_then(|v| v.as_str()),
Some("E. coli")
);
assert_eq!(
data.get("name_synonym").and_then(|v| v.as_str()),
Some("Bacterium coli")
);
assert_eq!(data.get("comments").and_then(|v| v.as_str()), Some(""));
}
}
Loading
Loading