From eeccc7b921347b0c662a940cb43af3902094c72c Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Thu, 19 Sep 2024 15:43:38 +0200 Subject: [PATCH 1/6] use ZipArchives to write to a .npz file --- Project.toml | 4 ++- src/NPZ.jl | 91 +++--------------------------------------------- src/write_npz.jl | 88 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 87 deletions(-) create mode 100644 src/write_npz.jl diff --git a/Project.toml b/Project.toml index a82de7a..e58321a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,14 +1,16 @@ name = "NPZ" uuid = "15e1cf62-19b3-5cfa-8e77-841668bca605" -version = "0.4.3" +version = "0.4.4" [deps] FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [compat] FileIO = "1" ZipFile = "0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10" +ZipArchives = "2" julia = "1.0" [extras] diff --git a/src/NPZ.jl b/src/NPZ.jl index adbf1bb..ff398ca 100644 --- a/src/NPZ.jl +++ b/src/NPZ.jl @@ -5,6 +5,8 @@ module NPZ using ZipFile, FileIO import Base.CodeUnits +using ZipArchives: ZipWriter, zip_newfile + export npzread, npzwrite @@ -55,6 +57,9 @@ function writecheck(io::IO, x::Any) n == sizeof(x) || error("short write") # sizeof is size in bytes end +## Includes here +include("write_npz.jl") + # Endianness only pertains to multi-byte things writele(ios::IO, x::AbstractVector{UInt8}) = writecheck(ios, x) writele(ios::IO, x::AbstractVector{CodeUnits{UInt8, <:Any}}) = writecheck(ios, x) @@ -364,92 +369,6 @@ function npzwritearray(f::IO, x::Number) npzwritearray(f, reinterpret(UInt8, [x]), typeof(x), ()) end -""" - npzwrite(filename::AbstractString, x) - -Write the variable `x` to the `npy` file `filename`. -Unlike `numpy`, the extension `.npy` is not appened to `filename`. - -!!! warn "Warning" - Any existing file with the same name will be overwritten. - -# Examples - -```julia -julia> npzwrite("abc.npy", zeros(3)) - -julia> npzread("abc.npy") -3-element Array{Float64,1}: - 0.0 - 0.0 - 0.0 -``` -""" -function npzwrite(filename::AbstractString, x) - open(filename, "w") do f - npzwritearray(f, x) - end -end - -""" - npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}) - npzwrite(filename::AbstractString, args...; kwargs...) - -In the first form, write the variables in `vars` to an `npz` file named `filename`. - -In the second form, collect the variables in `args` and `kwargs` and write them all -to `filename`. The variables in `args` are saved with names `arr_0`, `arr_1` -and so on, whereas the ones in `kwargs` are saved with the specified names. - -Unlike `numpy`, the extension `.npz` is not appened to `filename`. - -!!! warn "Warning" - Any existing file with the same name will be overwritten. - -# Examples - -```julia -julia> npzwrite("temp.npz", Dict("x" => ones(3), "y" => 3)) - -julia> npzread("temp.npz") -Dict{String,Any} with 2 entries: - "x" => [1.0, 1.0, 1.0] - "y" => 3 - -julia> npzwrite("temp.npz", ones(2,2), x = ones(3), y = 3) - -julia> npzread("temp.npz") -Dict{String,Any} with 3 entries: - "arr_0" => [1.0 1.0; 1.0 1.0] - "x" => [1.0, 1.0, 1.0] - "y" => 3 -``` -""" -function npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}) - dir = ZipFile.Writer(filename) - - if length(vars) == 0 - @warn "no data to be written to $filename. It might not be possible to read the file correctly." - end - - for (k, v) in vars - f = ZipFile.addfile(dir, k * ".npy") - npzwritearray(f, v) - close(f) - end - - close(dir) -end - -function npzwrite(filename::AbstractString, args...; kwargs...) - dkwargs = Dict(string(k) => v for (k,v) in kwargs) - dargs = Dict("arr_"*string(i-1) => v for (i,v) in enumerate(args)) - - d = convert(Dict{String,Any}, merge(dargs, dkwargs)) - - npzwrite(filename, d) -end - # support for FileIO load(file::File{format"NPY"}, vars...) = npzread(filename(file), vars...) load(file::File{format"NPZ"}, vars...) = npzread(filename(file), vars...) diff --git a/src/write_npz.jl b/src/write_npz.jl new file mode 100644 index 0000000..02cd655 --- /dev/null +++ b/src/write_npz.jl @@ -0,0 +1,88 @@ + +""" + npzwrite(filename::AbstractString, x) + +Write the variable `x` to the `npy` file `filename`. +Unlike `numpy`, the extension `.npy` is not appened to `filename`. + +!!! warn "Warning" + Any existing file with the same name will be overwritten. + +# Examples + +```julia +julia> npzwrite("abc.npy", zeros(3)) + +julia> npzread("abc.npy") +3-element Array{Float64,1}: + 0.0 + 0.0 + 0.0 +``` +""" +function npzwrite(filename::AbstractString, x) + open(filename, "w") do f + npzwritearray(f, x) + end +end + +""" + npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}) + npzwrite(filename::AbstractString, args...; kwargs...) + +In the first form, write the variables in `vars` to an `npz` file named `filename`. + +In the second form, collect the variables in `args` and `kwargs` and write them all +to `filename`. The variables in `args` are saved with names `arr_0`, `arr_1` +and so on, whereas the ones in `kwargs` are saved with the specified names. + +Unlike `numpy`, the extension `.npz` is not appened to `filename`. + +!!! warn "Warning" + Any existing file with the same name will be overwritten. + +# Examples + +```julia +julia> npzwrite("temp.npz", Dict("x" => ones(3), "y" => 3)) + +julia> npzread("temp.npz") +Dict{String,Any} with 2 entries: + "x" => [1.0, 1.0, 1.0] + "y" => 3 + +julia> npzwrite("temp.npz", ones(2,2), x = ones(3), y = 3) + +julia> npzread("temp.npz") +Dict{String,Any} with 3 entries: + "arr_0" => [1.0 1.0; 1.0 1.0] + "x" => [1.0, 1.0, 1.0] + "y" => 3 +``` +""" +function npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}; compress=false, compression_level=4) + ZipWriter(filename) do w + if length(vars) == 0 + @warn "no data to be written to $filename. It might not be possible to read the file correctly." + end + for (name,v) in vars + ## write array into buffer, then get the data + b =IOBuffer() + npzwritearray(b,v) + arr = take!(b) + zip_newfile(w, name*".npy",compress=compress, compression_level=compression_level) + write(w, arr) + end + + end +end + +function npzwrite(filename::AbstractString, args...; compress=false, compression_level=4, kwargs...) + + dkwargs = Dict(string(k) => v for (k,v) in kwargs) + dargs = Dict("arr_"*string(i-1) => v for (i,v) in enumerate(args)) + + d = convert(Dict{String,Any}, merge(dargs, dkwargs)) + + npzwrite(filename, d; compress=compress, compression_level=compression_level) +end From fc306d8ff105dc2168b14bfa4c66176a160d85f0 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Thu, 19 Sep 2024 16:04:52 +0200 Subject: [PATCH 2/6] Set default compression level --- src/write_npz.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/write_npz.jl b/src/write_npz.jl index 02cd655..9e026b4 100644 --- a/src/write_npz.jl +++ b/src/write_npz.jl @@ -60,7 +60,7 @@ Dict{String,Any} with 3 entries: "y" => 3 ``` """ -function npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}; compress=false, compression_level=4) +function npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}; compress=false, compression_level=3) ZipWriter(filename) do w if length(vars) == 0 @warn "no data to be written to $filename. It might not be possible to read the file correctly." From d97f67b53582cd6b1a1a70b9e0af5db5e06e19f7 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Tue, 25 Feb 2025 11:50:42 +0100 Subject: [PATCH 3/6] Avoid buffering, declare packages in separate file --- src/NPZ.jl | 2 -- src/write_npz.jl | 13 ++++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/NPZ.jl b/src/NPZ.jl index ff398ca..5370821 100644 --- a/src/NPZ.jl +++ b/src/NPZ.jl @@ -5,8 +5,6 @@ module NPZ using ZipFile, FileIO import Base.CodeUnits -using ZipArchives: ZipWriter, zip_newfile - export npzread, npzwrite diff --git a/src/write_npz.jl b/src/write_npz.jl index 9e026b4..f81ea41 100644 --- a/src/write_npz.jl +++ b/src/write_npz.jl @@ -1,3 +1,4 @@ +using ZipArchives: ZipWriter, zip_newfile """ npzwrite(filename::AbstractString, x) @@ -61,17 +62,15 @@ Dict{String,Any} with 3 entries: ``` """ function npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}; compress=false, compression_level=3) - ZipWriter(filename) do w + ZipWriter(filename) do outf if length(vars) == 0 @warn "no data to be written to $filename. It might not be possible to read the file correctly." end for (name,v) in vars - ## write array into buffer, then get the data - b =IOBuffer() - npzwritearray(b,v) - arr = take!(b) - zip_newfile(w, name*".npy",compress=compress, compression_level=compression_level) - write(w, arr) + # create new file + zip_newfile(outf, name*".npy",compress=compress, compression_level=compression_level) + # write the data + npzwritearray(outf, v) end end From 47b7aeeb6827621f3ca4c7eb530675168b47fe14 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Tue, 25 Feb 2025 12:06:13 +0100 Subject: [PATCH 4/6] Improve documentation --- src/write_npz.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/write_npz.jl b/src/write_npz.jl index f81ea41..b15b070 100644 --- a/src/write_npz.jl +++ b/src/write_npz.jl @@ -28,8 +28,8 @@ function npzwrite(filename::AbstractString, x) end """ - npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}) - npzwrite(filename::AbstractString, args...; kwargs...) + npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}; compress=false, compression_level=3) + npzwrite(filename::AbstractString, args...; compress=false, compression_level=3, kwargs...) In the first form, write the variables in `vars` to an `npz` file named `filename`. @@ -39,6 +39,8 @@ and so on, whereas the ones in `kwargs` are saved with the specified names. Unlike `numpy`, the extension `.npz` is not appened to `filename`. +Use `compress=true` to write the file with Zip compression, at the level specified by `compression_level`. + !!! warn "Warning" Any existing file with the same name will be overwritten. From 4ecb91a47b2c717bee8ae8a9e18edffee5ca0da7 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 19 Dec 2025 12:58:45 +0100 Subject: [PATCH 5/6] add possibility to read NPZ files lazily --- src/NPZ.jl | 7 ++--- src/read_lazy.jl | 68 +++++++++++++++++++++++++++++++++++++++++++++++ test/data.npz | Bin 0 -> 824 bytes test/runtests.jl | 14 ++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 src/read_lazy.jl create mode 100644 test/data.npz diff --git a/src/NPZ.jl b/src/NPZ.jl index 5370821..56216ed 100644 --- a/src/NPZ.jl +++ b/src/NPZ.jl @@ -55,9 +55,6 @@ function writecheck(io::IO, x::Any) n == sizeof(x) || error("short write") # sizeof is size in bytes end -## Includes here -include("write_npz.jl") - # Endianness only pertains to multi-byte things writele(ios::IO, x::AbstractVector{UInt8}) = writecheck(ios, x) writele(ios::IO, x::AbstractVector{CodeUnits{UInt8, <:Any}}) = writecheck(ios, x) @@ -374,4 +371,8 @@ load(file::File{format"NPZ"}, vars...) = npzread(filename(file), vars...) save(file::File{format"NPY"}, data, vars...) = npzwrite(filename(file), data, vars...) save(file::File{format"NPZ"}, data, vars...) = npzwrite(filename(file), data, vars...) +## Includes here +include("write_npz.jl") +include("read_lazy.jl") + end # module diff --git a/src/read_lazy.jl b/src/read_lazy.jl new file mode 100644 index 0000000..1c877f8 --- /dev/null +++ b/src/read_lazy.jl @@ -0,0 +1,68 @@ +using ZipFile +using NPZ +import Base: size, eltype, getindex, close + +export npzread_lazy + +""" + LazyNPZ + +Rappresenta un archivio NPZ aperto in modalità lazy. +""" +struct LazyNPZ + reader::ZipFile.Reader + entries::Dict{String,ZipFile.ReadableFile} + cache_f::Dict{String, Array} + closed::Bool + + function LazyNPZ(reader::ZipFile.Reader, entries::Dict{String,ZipFile.ReadableFile}) + new(reader, entries, Dict{String,Array}(), false) + end +end + + + +function Base.show(io::IO, npz::LazyNPZ) + println("LazyNPZ(files=$(keys(npz.entries)),loaded=$(keys(npz.cache_f)))") +end + +function close(npz::LazyNPZ) + if !npz.closed + close(npz.reader) + npz.closed = true + end +end + +""" + npzread_lazy(filename) + +Apre un file NPZ senza caricare i dati. +""" +function npzread_lazy(filename::AbstractString) + reader = ZipFile.Reader(filename) + entries = Dict{String,ZipFile.ReadableFile}() + + for f in reader.files + name = _maybetrimext(f.name) + entries[name] = f + end + + LazyNPZ(reader, entries) +end + +function Base.getindex(npz::LazyNPZ, name::AbstractString) + if !in(name,keys(npz.cache_f)) + if npz.closed + error("Key `$name` not found. File is closed so no reads are possible") + end + if !in(name, keys(npz.entries)) + ("Array `$name` not found inside the archive") + end + f = npz.entries[name] + arr = npzreadarray(f) + npz.cache_f[name] = arr + end + npz.cache_f[name] +end + +Base.keys(npz::LazyNPZ) = keys(npz.entries) diff --git a/test/data.npz b/test/data.npz new file mode 100644 index 0000000000000000000000000000000000000000..7b934b1da29ad3f3144cd848e24a28190f80f660 GIT binary patch literal 824 zcmWIWW@gc4U|`??Vnv4PvhCIXp@4@$gdrz0uecyFIaM#Oppub6fI)x>s0;`!86ouS zH{!3P1x}s_I2N#W-lV8GF-!86F36jlws76z1@orGhXhTTAwEBT$|NqKa_=*mepdqn z^v`+dc!X{bx?o^>;o_CR6`>091%?ids;mEfV)tKu!}*2kx9hk1|LzjlD0k|9692PP z@0+^bZ=b*9k)`L+&jKO;I{r2ZoRsZ(voy$3c^=kP^xivMv zQdj)b{Ja3EfPBzo$yKG&4@~)`d{w6>FG^^r@o#k(4FJrIQc(jY%^#8uS8Ygx< zU=OI4yOF-;;rqY4J}6$<=l(6>C4c{dZ+6?Bvq%46$@(w+`$^PRtJ_gy*?6?4|#!t37mTHn1}UpalCD#)1#9VCMt|dr@LuIy}r5Fhj!nKZIhKd*-?y zQy_!uMaPg0+TTuXJ;1v6Rl-q0Zm|h{tfJC=25!R2X~%vf{o$&=F(aq`z~4=iJ{Ro! zyfkLxn@e_E{3AEdS*6RXn0!8PWlPPP+wZsij9Piy^mV!a4E?^PEhY2Ln0repiCiod z4SVGMXLnd{%#DV;oFi&EZv0gnS^x9h-XLqeL3sBD>FNvZdvCP=wPy_d-P{^Ak# zHIMiMycwBvnNgE0C^>?XCJ!`-KvV-bi6SRpbe-T(2dV@GFei%6a-d##90Yi?vVmlo MfUpoqZv^K70G>Nmod5s; literal 0 HcmV?d00001 diff --git a/test/runtests.jl b/test/runtests.jl index 8e0c1ee..bde1581 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -132,4 +132,18 @@ end @test eltype(hdr["x"]) == eltype(npzread(f, ["x"])["x"]) @test size(hdr["y"]) == () @test eltype(hdr["y"]) == eltype(npzread(f, ["y"])["y"]) +end + + +@testset "read_npz_lazy" begin + + ark = NPZ.npzread_lazy("data.npz") + + arr_r = ark["range"] + @test arr_r == collect(Int64, 0:99) + + arr_l = ark["linspace"] + @test length(arr_l) == 50 + @test arr_l[1] == 0.0 + @test arr_l[50] == 1.0 end \ No newline at end of file From 37dbb9bef20be1eaf817c055cd2b09fdbcb115f4 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Sat, 3 Jan 2026 17:01:06 +0100 Subject: [PATCH 6/6] Fix descriptions --- src/read_lazy.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/read_lazy.jl b/src/read_lazy.jl index 1c877f8..ba5f727 100644 --- a/src/read_lazy.jl +++ b/src/read_lazy.jl @@ -7,7 +7,7 @@ export npzread_lazy """ LazyNPZ -Rappresenta un archivio NPZ aperto in modalità lazy. +Struct holding the data of a lazily read .npz file """ struct LazyNPZ reader::ZipFile.Reader @@ -21,7 +21,6 @@ struct LazyNPZ end - function Base.show(io::IO, npz::LazyNPZ) println("LazyNPZ(files=$(keys(npz.entries)),loaded=$(keys(npz.cache_f)))") end @@ -36,7 +35,7 @@ end """ npzread_lazy(filename) -Apre un file NPZ senza caricare i dati. +Read an npz file lazily """ function npzread_lazy(filename::AbstractString) reader = ZipFile.Reader(filename)