diff --git a/Project.toml b/Project.toml index a82de7a..e58321a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,14 +1,16 @@ name = "NPZ" uuid = "15e1cf62-19b3-5cfa-8e77-841668bca605" -version = "0.4.3" +version = "0.4.4" [deps] FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [compat] FileIO = "1" ZipFile = "0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10" +ZipArchives = "2" julia = "1.0" [extras] diff --git a/src/NPZ.jl b/src/NPZ.jl index adbf1bb..56216ed 100644 --- a/src/NPZ.jl +++ b/src/NPZ.jl @@ -364,92 +364,6 @@ function npzwritearray(f::IO, x::Number) npzwritearray(f, reinterpret(UInt8, [x]), typeof(x), ()) end -""" - npzwrite(filename::AbstractString, x) - -Write the variable `x` to the `npy` file `filename`. -Unlike `numpy`, the extension `.npy` is not appened to `filename`. - -!!! warn "Warning" - Any existing file with the same name will be overwritten. - -# Examples - -```julia -julia> npzwrite("abc.npy", zeros(3)) - -julia> npzread("abc.npy") -3-element Array{Float64,1}: - 0.0 - 0.0 - 0.0 -``` -""" -function npzwrite(filename::AbstractString, x) - open(filename, "w") do f - npzwritearray(f, x) - end -end - -""" - npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}) - npzwrite(filename::AbstractString, args...; kwargs...) - -In the first form, write the variables in `vars` to an `npz` file named `filename`. - -In the second form, collect the variables in `args` and `kwargs` and write them all -to `filename`. The variables in `args` are saved with names `arr_0`, `arr_1` -and so on, whereas the ones in `kwargs` are saved with the specified names. - -Unlike `numpy`, the extension `.npz` is not appened to `filename`. - -!!! warn "Warning" - Any existing file with the same name will be overwritten. - -# Examples - -```julia -julia> npzwrite("temp.npz", Dict("x" => ones(3), "y" => 3)) - -julia> npzread("temp.npz") -Dict{String,Any} with 2 entries: - "x" => [1.0, 1.0, 1.0] - "y" => 3 - -julia> npzwrite("temp.npz", ones(2,2), x = ones(3), y = 3) - -julia> npzread("temp.npz") -Dict{String,Any} with 3 entries: - "arr_0" => [1.0 1.0; 1.0 1.0] - "x" => [1.0, 1.0, 1.0] - "y" => 3 -``` -""" -function npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}) - dir = ZipFile.Writer(filename) - - if length(vars) == 0 - @warn "no data to be written to $filename. It might not be possible to read the file correctly." - end - - for (k, v) in vars - f = ZipFile.addfile(dir, k * ".npy") - npzwritearray(f, v) - close(f) - end - - close(dir) -end - -function npzwrite(filename::AbstractString, args...; kwargs...) - dkwargs = Dict(string(k) => v for (k,v) in kwargs) - dargs = Dict("arr_"*string(i-1) => v for (i,v) in enumerate(args)) - - d = convert(Dict{String,Any}, merge(dargs, dkwargs)) - - npzwrite(filename, d) -end - # support for FileIO load(file::File{format"NPY"}, vars...) = npzread(filename(file), vars...) load(file::File{format"NPZ"}, vars...) = npzread(filename(file), vars...) @@ -457,4 +371,8 @@ load(file::File{format"NPZ"}, vars...) = npzread(filename(file), vars...) save(file::File{format"NPY"}, data, vars...) = npzwrite(filename(file), data, vars...) save(file::File{format"NPZ"}, data, vars...) = npzwrite(filename(file), data, vars...) +## Includes here +include("write_npz.jl") +include("read_lazy.jl") + end # module diff --git a/src/read_lazy.jl b/src/read_lazy.jl new file mode 100644 index 0000000..ba5f727 --- /dev/null +++ b/src/read_lazy.jl @@ -0,0 +1,67 @@ +using ZipFile +using NPZ +import Base: size, eltype, getindex, close + +export npzread_lazy + +""" + LazyNPZ + +Struct holding the data of a lazily read .npz file +""" +struct LazyNPZ + reader::ZipFile.Reader + entries::Dict{String,ZipFile.ReadableFile} + cache_f::Dict{String, Array} + closed::Bool + + function LazyNPZ(reader::ZipFile.Reader, entries::Dict{String,ZipFile.ReadableFile}) + new(reader, entries, Dict{String,Array}(), false) + end +end + + +function Base.show(io::IO, npz::LazyNPZ) + println("LazyNPZ(files=$(keys(npz.entries)),loaded=$(keys(npz.cache_f)))") +end + +function close(npz::LazyNPZ) + if !npz.closed + close(npz.reader) + npz.closed = true + end +end + +""" + npzread_lazy(filename) + +Read an npz file lazily +""" +function npzread_lazy(filename::AbstractString) + reader = ZipFile.Reader(filename) + entries = Dict{String,ZipFile.ReadableFile}() + + for f in reader.files + name = _maybetrimext(f.name) + entries[name] = f + end + + LazyNPZ(reader, entries) +end + +function Base.getindex(npz::LazyNPZ, name::AbstractString) + if !in(name,keys(npz.cache_f)) + if npz.closed + error("Key `$name` not found. File is closed so no reads are possible") + end + if !in(name, keys(npz.entries)) + ("Array `$name` not found inside the archive") + end + f = npz.entries[name] + arr = npzreadarray(f) + npz.cache_f[name] = arr + end + npz.cache_f[name] +end + +Base.keys(npz::LazyNPZ) = keys(npz.entries) diff --git a/src/write_npz.jl b/src/write_npz.jl new file mode 100644 index 0000000..b15b070 --- /dev/null +++ b/src/write_npz.jl @@ -0,0 +1,89 @@ +using ZipArchives: ZipWriter, zip_newfile + +""" + npzwrite(filename::AbstractString, x) + +Write the variable `x` to the `npy` file `filename`. +Unlike `numpy`, the extension `.npy` is not appened to `filename`. + +!!! warn "Warning" + Any existing file with the same name will be overwritten. + +# Examples + +```julia +julia> npzwrite("abc.npy", zeros(3)) + +julia> npzread("abc.npy") +3-element Array{Float64,1}: + 0.0 + 0.0 + 0.0 +``` +""" +function npzwrite(filename::AbstractString, x) + open(filename, "w") do f + npzwritearray(f, x) + end +end + +""" + npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}; compress=false, compression_level=3) + npzwrite(filename::AbstractString, args...; compress=false, compression_level=3, kwargs...) + +In the first form, write the variables in `vars` to an `npz` file named `filename`. + +In the second form, collect the variables in `args` and `kwargs` and write them all +to `filename`. The variables in `args` are saved with names `arr_0`, `arr_1` +and so on, whereas the ones in `kwargs` are saved with the specified names. + +Unlike `numpy`, the extension `.npz` is not appened to `filename`. + +Use `compress=true` to write the file with Zip compression, at the level specified by `compression_level`. + +!!! warn "Warning" + Any existing file with the same name will be overwritten. + +# Examples + +```julia +julia> npzwrite("temp.npz", Dict("x" => ones(3), "y" => 3)) + +julia> npzread("temp.npz") +Dict{String,Any} with 2 entries: + "x" => [1.0, 1.0, 1.0] + "y" => 3 + +julia> npzwrite("temp.npz", ones(2,2), x = ones(3), y = 3) + +julia> npzread("temp.npz") +Dict{String,Any} with 3 entries: + "arr_0" => [1.0 1.0; 1.0 1.0] + "x" => [1.0, 1.0, 1.0] + "y" => 3 +``` +""" +function npzwrite(filename::AbstractString, vars::Dict{<:AbstractString}; compress=false, compression_level=3) + ZipWriter(filename) do outf + if length(vars) == 0 + @warn "no data to be written to $filename. It might not be possible to read the file correctly." + end + for (name,v) in vars + # create new file + zip_newfile(outf, name*".npy",compress=compress, compression_level=compression_level) + # write the data + npzwritearray(outf, v) + end + + end +end + +function npzwrite(filename::AbstractString, args...; compress=false, compression_level=4, kwargs...) + + dkwargs = Dict(string(k) => v for (k,v) in kwargs) + dargs = Dict("arr_"*string(i-1) => v for (i,v) in enumerate(args)) + + d = convert(Dict{String,Any}, merge(dargs, dkwargs)) + + npzwrite(filename, d; compress=compress, compression_level=compression_level) +end diff --git a/test/data.npz b/test/data.npz new file mode 100644 index 0000000..7b934b1 Binary files /dev/null and b/test/data.npz differ diff --git a/test/runtests.jl b/test/runtests.jl index 8e0c1ee..bde1581 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -132,4 +132,18 @@ end @test eltype(hdr["x"]) == eltype(npzread(f, ["x"])["x"]) @test size(hdr["y"]) == () @test eltype(hdr["y"]) == eltype(npzread(f, ["y"])["y"]) +end + + +@testset "read_npz_lazy" begin + + ark = NPZ.npzread_lazy("data.npz") + + arr_r = ark["range"] + @test arr_r == collect(Int64, 0:99) + + arr_l = ark["linspace"] + @test length(arr_l) == 50 + @test arr_l[1] == 0.0 + @test arr_l[50] == 1.0 end \ No newline at end of file