diff --git a/docs/make.jl b/docs/make.jl index 10af5d0..61bdf54 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,14 +1,14 @@ using Documenter, FeatherLib makedocs( - modules = [FeatherLib], - sitename = "FeatherLib.jl", + modules=[FeatherLib], + sitename="FeatherLib.jl", analytics="UA-132838790-1", - pages = [ + pages=[ "Introduction" => "index.md" ] ) deploydocs( - repo = "github.com/queryverse/FeatherLib.jl.git" + repo="github.com/queryverse/FeatherLib.jl.git" ) diff --git a/src/loadfile.jl b/src/loadfile.jl index 4192517..1c24dbb 100644 --- a/src/loadfile.jl +++ b/src/loadfile.jl @@ -6,7 +6,7 @@ function validatefile(filename::AbstractString, data::AbstractVector{UInt8}) throw(ArgumentError("'$file' is not in feather format: total length of file: $(length(data))")) end header = data[1:4] - footer = data[(end-3):end] + footer = data[(end - 3):end] if header ≠ FEATHER_MAGIC_BYTES || footer ≠ FEATHER_MAGIC_BYTES throw(ArgumentError(string("'$filename' is not in feather format: header = $header, ", "footer = $footer."))) @@ -21,15 +21,15 @@ function loadfile(filename::AbstractString; use_mmap::Bool=true) end function metalength(data::AbstractVector{UInt8}) - read(IOBuffer(data[(length(data)-7):(length(data)-4)]), Int32) + read(IOBuffer(data[(length(data) - 7):(length(data) - 4)]), Int32) end function metaposition(data::AbstractVector{UInt8}, metalen::Integer=metalength(data)) - length(data) - (metalen+7) + length(data) - (metalen + 7) end function rootposition(data::AbstractVector{UInt8}, mpos::Integer=metaposition(data)) - read(IOBuffer(data[mpos:(mpos+4)]), Int32) + read(IOBuffer(data[mpos:(mpos + 4)]), Int32) end function getctable(data::AbstractVector{UInt8}) diff --git a/src/metadata.jl b/src/metadata.jl index c244a61..32c429f 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -29,7 +29,7 @@ mutable struct CategoryMetadata ordered::Bool end -@DEFAULT CategoryMetadata ordered=false +@DEFAULT CategoryMetadata ordered = false mutable struct TimestampMetadata unit::TimeUnit @@ -43,7 +43,7 @@ mutable struct TimeMetadata unit::TimeUnit end -@UNION TypeMetadata (Nothing,CategoryMetadata,TimestampMetadata,DateMetadata,TimeMetadata) +@UNION TypeMetadata (Nothing, CategoryMetadata, TimestampMetadata, DateMetadata, TimeMetadata) mutable struct Column name::String @@ -119,7 +119,7 @@ const JULIA_TIME_DICT = Dict{Metadata.TimeUnit,DataType}( Metadata.MICROSECOND => Dates.Microsecond, Metadata.NANOSECOND => Dates.Nanosecond ) -const METADATA_TIME_DICT = Dict{DataType,Metadata.TimeUnit}(v=>k for (k,v) in JULIA_TIME_DICT) +const METADATA_TIME_DICT = Dict{DataType,Metadata.TimeUnit}(v => k for (k, v) in JULIA_TIME_DICT) isprimitivetype(t::Metadata.DType) = t ∉ NON_PRIMITIVE_TYPES @@ -171,4 +171,4 @@ function getmetadata(io::IO, ::Type{T}, A::DictEncoding) where T Metadata.CategoryMetadata(vals, true) end -getmetadata(io::IO, ::Type{Union{Missing, T}}, A::DictEncoding) where T = getmetadata(io, T, A) +getmetadata(io::IO, ::Type{Union{Missing,T}}, A::DictEncoding) where T = getmetadata(io, T, A) diff --git a/src/read.jl b/src/read.jl index f173321..a76d072 100644 --- a/src/read.jl +++ b/src/read.jl @@ -15,12 +15,12 @@ function featherread(filename::AbstractString; use_mmap=true) return ResultSet(columns, colnames, ctable.description, ctable.metadata) end -#===================================================================================================== +#= ==================================================================================================== new column construction stuff -=====================================================================================================# +==================================================================================================== =# Base.length(p::Metadata.PrimitiveArray) = p.length -startloc(p::Metadata.PrimitiveArray) = p.offset+1 +startloc(p::Metadata.PrimitiveArray) = p.offset + 1 Arrow.nullcount(p::Metadata.PrimitiveArray) = p.null_count @@ -29,7 +29,7 @@ function bitmasklength(p::Metadata.PrimitiveArray) end function offsetslength(p::Metadata.PrimitiveArray) - isprimitivetype(p.dtype) ? 0 : padding((length(p)+1)*sizeof(Int32)) + isprimitivetype(p.dtype) ? 0 : padding((length(p) + 1) * sizeof(Int32)) end valueslength(p::Metadata.PrimitiveArray) = p.total_bytes - offsetslength(p) - bitmasklength(p) diff --git a/src/write.jl b/src/write.jl index 80c0b33..19853a0 100644 --- a/src/write.jl +++ b/src/write.jl @@ -43,7 +43,7 @@ function writecontents(::Type{Metadata.PrimitiveArray}, io::IO, A::ArrowVector) a = position(io) writecontents(io, A) b = position(io) - Metadata.PrimitiveArray(A, a, b-a) + Metadata.PrimitiveArray(A, a, b - a) end @@ -55,7 +55,7 @@ end function writemetadata(io::IO, ctable::Metadata.CTable) meta = FlatBuffers.build!(ctable) - rng = (meta.head+1):length(meta.bytes) + rng = (meta.head + 1):length(meta.bytes) writepadded(io, view(meta.bytes, rng)) Int32(length(rng)) end diff --git a/test/runtests.jl b/test/runtests.jl index b4a81c3..0731974 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,21 +5,21 @@ temps = [] @testset "FeatherLib" begin -include("test_readwrite.jl") -include("test_arrow.jl") + include("test_readwrite.jl") + include("test_arrow.jl") -GC.gc(); GC.gc() -for t in temps - try - rm(t) - catch - GC.gc() + GC.gc(); GC.gc() + for t in temps try rm(t) catch + GC.gc() + try + rm(t) + catch + end end end -end # issue #34 # data = DataFrame(A=Union{Missing, String}[randstring(10) for i ∈ 1:100], B=rand(100)) diff --git a/test/test_arrow.jl b/test/test_arrow.jl index f3b4663..a791cfe 100644 --- a/test/test_arrow.jl +++ b/test/test_arrow.jl @@ -13,9 +13,9 @@ randdate() = Date(rand(0:4000), rand(1:12), rand(1:27)) randtime() = Dates.Time(rand(0:23), rand(0:59), rand(0:59)) randdatetime() = randdate() + randtime() -randstrings() = String[[randstring(rand(0:20)) for i ∈ 1:(NROWS-1)]; "a"] +randstrings() = String[[randstring(rand(0:20)) for i ∈ 1:(NROWS - 1)]; "a"] function randstrings(::Missing) - Union{String,Missing}[[rand(Bool) ? missing : randstring(rand(0:20)) for i ∈ 1:(NROWS-1)]; "a"] + Union{String,Missing}[[rand(Bool) ? missing : randstring(rand(0:20)) for i ∈ 1:(NROWS - 1)]; "a"] end convstring(str::AbstractString) = String(str) @@ -23,8 +23,8 @@ convstring(::Missing) = missing @testset "ArrowTests" begin -cols = [rand(Int32,NROWS), - rand(Float64,NROWS), + cols = [rand(Int32, NROWS), + rand(Float64, NROWS), Date[randdate() for i ∈ 1:NROWS], DateTime[randdatetime() for i ∈ 1:NROWS], Dates.Time[randtime() for i ∈ 1:NROWS], @@ -34,55 +34,53 @@ cols = [rand(Int32,NROWS), CategoricalArrays.categorical(randstrings()), CategoricalArrays.categorical(randstrings(missing))] -colnames = [:ints,:floats,:dates,:datetimes,:times,:missingints,:strings, + colnames = [:ints,:floats,:dates,:datetimes,:times,:missingints,:strings, :missingstrings,:catstrings,:catstringsmissing] -featherwrite(arrow_tempname, cols, colnames) + featherwrite(arrow_tempname, cols, colnames) -ndf = featherread(arrow_tempname) + ndf = featherread(arrow_tempname) -@test ndf.names == colnames + @test ndf.names == colnames -@test typeof(ndf.columns[1]) == Arrow.Primitive{Int32} -@test typeof(ndf.columns[2]) == Arrow.Primitive{Float64} -@test typeof(ndf.columns[3]) == Arrow.Primitive{Arrow.Datestamp} -@test typeof(ndf.columns[4]) == Arrow.Primitive{Arrow.Timestamp{Dates.Millisecond}} -@test typeof(ndf.columns[5]) == Arrow.Primitive{Arrow.TimeOfDay{Dates.Nanosecond,Int64}} -@test typeof(ndf.columns[6]) == Arrow.NullablePrimitive{Int64} -@test typeof(ndf.columns[7]) == Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}} -@test typeof(ndf.columns[8]) == Arrow.NullableList{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}} -@test typeof(ndf.columns[9]) == Arrow.DictEncoding{String,Arrow.Primitive{Int32}, - Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}} -@test typeof(ndf.columns[10]) == - Arrow.DictEncoding{Union{String,Missing},Arrow.NullablePrimitive{Int32},Arrow.List{String,Arrow.DefaultOffset, - Arrow.Primitive{UInt8}}} + @test typeof(ndf.columns[1]) == Arrow.Primitive{Int32} + @test typeof(ndf.columns[2]) == Arrow.Primitive{Float64} + @test typeof(ndf.columns[3]) == Arrow.Primitive{Arrow.Datestamp} + @test typeof(ndf.columns[4]) == Arrow.Primitive{Arrow.Timestamp{Dates.Millisecond}} + @test typeof(ndf.columns[5]) == Arrow.Primitive{Arrow.TimeOfDay{Dates.Nanosecond,Int64}} + @test typeof(ndf.columns[6]) == Arrow.NullablePrimitive{Int64} + @test typeof(ndf.columns[7]) == Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}} + @test typeof(ndf.columns[8]) == Arrow.NullableList{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}} + @test typeof(ndf.columns[9]) == Arrow.DictEncoding{String,Arrow.Primitive{Int32},Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}} + @test typeof(ndf.columns[10]) == + Arrow.DictEncoding{Union{String,Missing},Arrow.NullablePrimitive{Int32},Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}} -for j ∈ 1:N_IDX_TESTS - i = rand(1:NROWS) - @test cols[1][i] == ndf.columns[1][i] - @test cols[2][i] == ndf.columns[2][i] - @test cols[3][i] == convert(Date, ndf.columns[3][i]) - @test cols[4][i] == convert(DateTime, ndf.columns[4][i]) - @test cols[5][i] == convert(Dates.Time, ndf.columns[5][i]) - @test isequal(cols[6][i], ndf.columns[6][i]) - @test cols[7][i] == ndf.columns[7][i] - @test isequal(cols[8][i], ndf.columns[8][i]) - @test cols[9][i] == String(ndf.columns[9][i]) - @test isequal(cols[10][i], convstring(ndf.columns[10][i])) -end -for j ∈ 1:N_IDX_TESTS - a, b = extrema(rand(1:NROWS, 2)) - i = a:b - @test cols[1][i] == ndf.columns[1][i] - @test cols[2][i] == ndf.columns[2][i] - @test cols[3][i] == convert.(Date, ndf.columns[3][i]) - @test cols[4][i] == convert.(DateTime, ndf.columns[4][i]) - @test cols[5][i] == convert.(Dates.Time, ndf.columns[5][i]) - @test isequal(cols[6][i], ndf.columns[6][i]) - @test cols[7][i] == ndf.columns[7][i] - @test isequal(cols[8][i], ndf.columns[8][i]) - @test cols[9][i] == String.(ndf.columns[9][i]) - @test isequal(cols[10][i], convstring.(ndf.columns[10][i])) -end + for j ∈ 1:N_IDX_TESTS + i = rand(1:NROWS) + @test cols[1][i] == ndf.columns[1][i] + @test cols[2][i] == ndf.columns[2][i] + @test cols[3][i] == convert(Date, ndf.columns[3][i]) + @test cols[4][i] == convert(DateTime, ndf.columns[4][i]) + @test cols[5][i] == convert(Dates.Time, ndf.columns[5][i]) + @test isequal(cols[6][i], ndf.columns[6][i]) + @test cols[7][i] == ndf.columns[7][i] + @test isequal(cols[8][i], ndf.columns[8][i]) + @test cols[9][i] == String(ndf.columns[9][i]) + @test isequal(cols[10][i], convstring(ndf.columns[10][i])) + end + for j ∈ 1:N_IDX_TESTS + a, b = extrema(rand(1:NROWS, 2)) + i = a:b + @test cols[1][i] == ndf.columns[1][i] + @test cols[2][i] == ndf.columns[2][i] + @test cols[3][i] == convert.(Date, ndf.columns[3][i]) + @test cols[4][i] == convert.(DateTime, ndf.columns[4][i]) + @test cols[5][i] == convert.(Dates.Time, ndf.columns[5][i]) + @test isequal(cols[6][i], ndf.columns[6][i]) + @test cols[7][i] == ndf.columns[7][i] + @test isequal(cols[8][i], ndf.columns[8][i]) + @test cols[9][i] == String.(ndf.columns[9][i]) + @test isequal(cols[10][i], convstring.(ndf.columns[10][i])) + end end diff --git a/test/test_readwrite.jl b/test/test_readwrite.jl index 591a8d2..c41cb90 100644 --- a/test/test_readwrite.jl +++ b/test/test_readwrite.jl @@ -4,8 +4,8 @@ files = map(x -> joinpath(testdir, x), readdir(testdir)) for f in files - res = featherread(f) - columns, headers = res.columns, res.names + res = featherread(f) + columns, headers = res.columns, res.names ncols = length(columns) nrows = length(columns[1]) @@ -15,23 +15,23 @@ featherwrite(temp, columns, headers, description=res.description, metadata=res.metadata) - res2 = featherread(temp) - columns2, headers2 = res2.columns, res2.names + res2 = featherread(temp) + columns2, headers2 = res2.columns, res2.names @test length(columns2) == ncols - @test headers==headers2 + @test headers == headers2 - for (c1,c2) in zip(columns, columns2) - @test length(c1)==nrows - @test length(c2)==nrows + for (c1, c2) in zip(columns, columns2) + @test length(c1) == nrows + @test length(c2) == nrows for i = 1:nrows @test isequal(c1[i], c2[i]) end end - @test res.description == res2.description - @test res.metadata == res2.metadata + @test res.description == res2.description + @test res.metadata == res2.metadata # for (col1,col2) in zip(source.ctable.columns,sink.ctable.columns) # @test col1.name == col2.name # @test col1.metadata_type == col2.metadata_type