Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
using Documenter, FeatherLib

makedocs(
modules = [FeatherLib],
sitename = "FeatherLib.jl",
modules=[FeatherLib],
sitename="FeatherLib.jl",
analytics="UA-132838790-1",
pages = [
pages=[
"Introduction" => "index.md"
]
)

deploydocs(
repo = "github.com/queryverse/FeatherLib.jl.git"
repo="github.com/queryverse/FeatherLib.jl.git"
)
8 changes: 4 additions & 4 deletions src/loadfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ function validatefile(filename::AbstractString, data::AbstractVector{UInt8})
throw(ArgumentError("'$file' is not in feather format: total length of file: $(length(data))"))
end
header = data[1:4]
footer = data[(end-3):end]
footer = data[(end - 3):end]
if header ≠ FEATHER_MAGIC_BYTES || footer ≠ FEATHER_MAGIC_BYTES
throw(ArgumentError(string("'$filename' is not in feather format: header = $header, ",
"footer = $footer.")))
Expand All @@ -21,15 +21,15 @@ function loadfile(filename::AbstractString; use_mmap::Bool=true)
end

function metalength(data::AbstractVector{UInt8})
read(IOBuffer(data[(length(data)-7):(length(data)-4)]), Int32)
read(IOBuffer(data[(length(data) - 7):(length(data) - 4)]), Int32)
end

function metaposition(data::AbstractVector{UInt8}, metalen::Integer=metalength(data))
length(data) - (metalen+7)
length(data) - (metalen + 7)
end

function rootposition(data::AbstractVector{UInt8}, mpos::Integer=metaposition(data))
read(IOBuffer(data[mpos:(mpos+4)]), Int32)
read(IOBuffer(data[mpos:(mpos + 4)]), Int32)
end

function getctable(data::AbstractVector{UInt8})
Expand Down
8 changes: 4 additions & 4 deletions src/metadata.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ mutable struct CategoryMetadata
ordered::Bool
end

@DEFAULT CategoryMetadata ordered=false
@DEFAULT CategoryMetadata ordered = false

mutable struct TimestampMetadata
unit::TimeUnit
Expand All @@ -43,7 +43,7 @@ mutable struct TimeMetadata
unit::TimeUnit
end

@UNION TypeMetadata (Nothing,CategoryMetadata,TimestampMetadata,DateMetadata,TimeMetadata)
@UNION TypeMetadata (Nothing, CategoryMetadata, TimestampMetadata, DateMetadata, TimeMetadata)

mutable struct Column
name::String
Expand Down Expand Up @@ -119,7 +119,7 @@ const JULIA_TIME_DICT = Dict{Metadata.TimeUnit,DataType}(
Metadata.MICROSECOND => Dates.Microsecond,
Metadata.NANOSECOND => Dates.Nanosecond
)
const METADATA_TIME_DICT = Dict{DataType,Metadata.TimeUnit}(v=>k for (k,v) in JULIA_TIME_DICT)
const METADATA_TIME_DICT = Dict{DataType,Metadata.TimeUnit}(v => k for (k, v) in JULIA_TIME_DICT)


isprimitivetype(t::Metadata.DType) = t ∉ NON_PRIMITIVE_TYPES
Expand Down Expand Up @@ -171,4 +171,4 @@ function getmetadata(io::IO, ::Type{T}, A::DictEncoding) where T
Metadata.CategoryMetadata(vals, true)
end

getmetadata(io::IO, ::Type{Union{Missing, T}}, A::DictEncoding) where T = getmetadata(io, T, A)
getmetadata(io::IO, ::Type{Union{Missing,T}}, A::DictEncoding) where T = getmetadata(io, T, A)
8 changes: 4 additions & 4 deletions src/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ function featherread(filename::AbstractString; use_mmap=true)
return ResultSet(columns, colnames, ctable.description, ctable.metadata)
end

#=====================================================================================================
#= ====================================================================================================
new column construction stuff
=====================================================================================================#
==================================================================================================== =#
Base.length(p::Metadata.PrimitiveArray) = p.length

startloc(p::Metadata.PrimitiveArray) = p.offset+1
startloc(p::Metadata.PrimitiveArray) = p.offset + 1

Arrow.nullcount(p::Metadata.PrimitiveArray) = p.null_count

Expand All @@ -29,7 +29,7 @@ function bitmasklength(p::Metadata.PrimitiveArray)
end

function offsetslength(p::Metadata.PrimitiveArray)
isprimitivetype(p.dtype) ? 0 : padding((length(p)+1)*sizeof(Int32))
isprimitivetype(p.dtype) ? 0 : padding((length(p) + 1) * sizeof(Int32))
end

valueslength(p::Metadata.PrimitiveArray) = p.total_bytes - offsetslength(p) - bitmasklength(p)
Expand Down
4 changes: 2 additions & 2 deletions src/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function writecontents(::Type{Metadata.PrimitiveArray}, io::IO, A::ArrowVector)
a = position(io)
writecontents(io, A)
b = position(io)
Metadata.PrimitiveArray(A, a, b-a)
Metadata.PrimitiveArray(A, a, b - a)
end


Expand All @@ -55,7 +55,7 @@ end

function writemetadata(io::IO, ctable::Metadata.CTable)
meta = FlatBuffers.build!(ctable)
rng = (meta.head+1):length(meta.bytes)
rng = (meta.head + 1):length(meta.bytes)
writepadded(io, view(meta.bytes, rng))
Int32(length(rng))
end
18 changes: 9 additions & 9 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@ temps = []

@testset "FeatherLib" begin

include("test_readwrite.jl")
include("test_arrow.jl")
include("test_readwrite.jl")
include("test_arrow.jl")

GC.gc(); GC.gc()
for t in temps
try
rm(t)
catch
GC.gc()
GC.gc(); GC.gc()
for t in temps
try
rm(t)
catch
GC.gc()
try
rm(t)
catch
end
end
end
end

# issue #34
# data = DataFrame(A=Union{Missing, String}[randstring(10) for i ∈ 1:100], B=rand(100))
Expand Down
94 changes: 46 additions & 48 deletions test/test_arrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ randdate() = Date(rand(0:4000), rand(1:12), rand(1:27))
randtime() = Dates.Time(rand(0:23), rand(0:59), rand(0:59))
randdatetime() = randdate() + randtime()

randstrings() = String[[randstring(rand(0:20)) for i ∈ 1:(NROWS-1)]; "a"]
randstrings() = String[[randstring(rand(0:20)) for i ∈ 1:(NROWS - 1)]; "a"]
function randstrings(::Missing)
Union{String,Missing}[[rand(Bool) ? missing : randstring(rand(0:20)) for i ∈ 1:(NROWS-1)]; "a"]
Union{String,Missing}[[rand(Bool) ? missing : randstring(rand(0:20)) for i ∈ 1:(NROWS - 1)]; "a"]
end

convstring(str::AbstractString) = String(str)
convstring(::Missing) = missing

@testset "ArrowTests" begin

cols = [rand(Int32,NROWS),
rand(Float64,NROWS),
cols = [rand(Int32, NROWS),
rand(Float64, NROWS),
Date[randdate() for i ∈ 1:NROWS],
DateTime[randdatetime() for i ∈ 1:NROWS],
Dates.Time[randtime() for i ∈ 1:NROWS],
Expand All @@ -34,55 +34,53 @@ cols = [rand(Int32,NROWS),
CategoricalArrays.categorical(randstrings()),
CategoricalArrays.categorical(randstrings(missing))]

colnames = [:ints,:floats,:dates,:datetimes,:times,:missingints,:strings,
colnames = [:ints,:floats,:dates,:datetimes,:times,:missingints,:strings,
:missingstrings,:catstrings,:catstringsmissing]

featherwrite(arrow_tempname, cols, colnames)
featherwrite(arrow_tempname, cols, colnames)

ndf = featherread(arrow_tempname)
ndf = featherread(arrow_tempname)

@test ndf.names == colnames
@test ndf.names == colnames

@test typeof(ndf.columns[1]) == Arrow.Primitive{Int32}
@test typeof(ndf.columns[2]) == Arrow.Primitive{Float64}
@test typeof(ndf.columns[3]) == Arrow.Primitive{Arrow.Datestamp}
@test typeof(ndf.columns[4]) == Arrow.Primitive{Arrow.Timestamp{Dates.Millisecond}}
@test typeof(ndf.columns[5]) == Arrow.Primitive{Arrow.TimeOfDay{Dates.Nanosecond,Int64}}
@test typeof(ndf.columns[6]) == Arrow.NullablePrimitive{Int64}
@test typeof(ndf.columns[7]) == Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[8]) == Arrow.NullableList{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[9]) == Arrow.DictEncoding{String,Arrow.Primitive{Int32},
Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}}
@test typeof(ndf.columns[10]) ==
Arrow.DictEncoding{Union{String,Missing},Arrow.NullablePrimitive{Int32},Arrow.List{String,Arrow.DefaultOffset,
Arrow.Primitive{UInt8}}}
@test typeof(ndf.columns[1]) == Arrow.Primitive{Int32}
@test typeof(ndf.columns[2]) == Arrow.Primitive{Float64}
@test typeof(ndf.columns[3]) == Arrow.Primitive{Arrow.Datestamp}
@test typeof(ndf.columns[4]) == Arrow.Primitive{Arrow.Timestamp{Dates.Millisecond}}
@test typeof(ndf.columns[5]) == Arrow.Primitive{Arrow.TimeOfDay{Dates.Nanosecond,Int64}}
@test typeof(ndf.columns[6]) == Arrow.NullablePrimitive{Int64}
@test typeof(ndf.columns[7]) == Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[8]) == Arrow.NullableList{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}
@test typeof(ndf.columns[9]) == Arrow.DictEncoding{String,Arrow.Primitive{Int32},Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}}
@test typeof(ndf.columns[10]) ==
Arrow.DictEncoding{Union{String,Missing},Arrow.NullablePrimitive{Int32},Arrow.List{String,Arrow.DefaultOffset,Arrow.Primitive{UInt8}}}

for j ∈ 1:N_IDX_TESTS
i = rand(1:NROWS)
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert(Date, ndf.columns[3][i])
@test cols[4][i] == convert(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String(ndf.columns[9][i])
@test isequal(cols[10][i], convstring(ndf.columns[10][i]))
end
for j ∈ 1:N_IDX_TESTS
a, b = extrema(rand(1:NROWS, 2))
i = a:b
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert.(Date, ndf.columns[3][i])
@test cols[4][i] == convert.(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert.(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String.(ndf.columns[9][i])
@test isequal(cols[10][i], convstring.(ndf.columns[10][i]))
end
for j ∈ 1:N_IDX_TESTS
i = rand(1:NROWS)
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert(Date, ndf.columns[3][i])
@test cols[4][i] == convert(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String(ndf.columns[9][i])
@test isequal(cols[10][i], convstring(ndf.columns[10][i]))
end
for j ∈ 1:N_IDX_TESTS
a, b = extrema(rand(1:NROWS, 2))
i = a:b
@test cols[1][i] == ndf.columns[1][i]
@test cols[2][i] == ndf.columns[2][i]
@test cols[3][i] == convert.(Date, ndf.columns[3][i])
@test cols[4][i] == convert.(DateTime, ndf.columns[4][i])
@test cols[5][i] == convert.(Dates.Time, ndf.columns[5][i])
@test isequal(cols[6][i], ndf.columns[6][i])
@test cols[7][i] == ndf.columns[7][i]
@test isequal(cols[8][i], ndf.columns[8][i])
@test cols[9][i] == String.(ndf.columns[9][i])
@test isequal(cols[10][i], convstring.(ndf.columns[10][i]))
end

end
20 changes: 10 additions & 10 deletions test/test_readwrite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
files = map(x -> joinpath(testdir, x), readdir(testdir))

for f in files
res = featherread(f)
columns, headers = res.columns, res.names
res = featherread(f)
columns, headers = res.columns, res.names

ncols = length(columns)
nrows = length(columns[1])
Expand All @@ -15,23 +15,23 @@

featherwrite(temp, columns, headers, description=res.description, metadata=res.metadata)

res2 = featherread(temp)
columns2, headers2 = res2.columns, res2.names
res2 = featherread(temp)
columns2, headers2 = res2.columns, res2.names

@test length(columns2) == ncols

@test headers==headers2
@test headers == headers2

for (c1,c2) in zip(columns, columns2)
@test length(c1)==nrows
@test length(c2)==nrows
for (c1, c2) in zip(columns, columns2)
@test length(c1) == nrows
@test length(c2) == nrows
for i = 1:nrows
@test isequal(c1[i], c2[i])
end
end

@test res.description == res2.description
@test res.metadata == res2.metadata
@test res.description == res2.description
@test res.metadata == res2.metadata
# for (col1,col2) in zip(source.ctable.columns,sink.ctable.columns)
# @test col1.name == col2.name
# @test col1.metadata_type == col2.metadata_type
Expand Down