Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ DataValues = "^0.4.5"
FileIO = "1"
IterableTables = "0.9, 0.10, 0.11, 1"
IteratorInterfaceExtensions = "^0.1.1, 1"
Parquet = "0.2, 0.3, 0.4"
Parquet = "0.5"
TableShowUtils = "0.2"
TableTraits = "0.4, 1"
julia = "1"
47 changes: 2 additions & 45 deletions src/ParquetFiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,6 @@ function Base.show(io::IO, ::MIME"application/vnd.dataresource+json", source::Pa
end
Base.Multimedia.showable(::MIME"application/vnd.dataresource+json", source::ParquetFile) = true

struct ParquetNamedTupleIterator{T,T_row}
rc::RecCursor
nrows::Int
end

function Base.eltype(itr::ParquetNamedTupleIterator{T,T_row}) where {T,T_row}
return T
end

function Base.length(itr::ParquetNamedTupleIterator)
return itr.nrows
end

@generated function Base.iterate(itr::ParquetNamedTupleIterator{T,T_row}, state...) where {T,T_row}
names = fieldnames(T)
quote
y = iterate(itr.rc, state...)
if y === nothing
return nothing
else
v = y[1]
next_state = y[2]
return T(($([fieldtype(T, i) <: String ? :(String(copy(v.$(names[i])))) : :(v.$(names[i])) for i = 1:length(names)]...),)), next_state
end
end
end

function fileio_load(f::FileIO.File{FileIO.format"Parquet"})
return ParquetFile(f.filename)
end
Expand All @@ -58,24 +31,8 @@ IteratorInterfaceExtensions.isiterable(x::ParquetFile) = true
TableTraits.isiterabletable(x::ParquetFile) = true

function IteratorInterfaceExtensions.getiterator(file::ParquetFile)
p = ParFile(file.filename)

T_row_name = Symbol("RCType$(String(gensym())[3:end])")

schema(JuliaConverter(ParquetFiles), p, T_row_name)

T_row = eval(T_row_name)

col_names = fieldnames(T_row)
col_types = [i <: Vector{UInt8} ? String : i for i in T_row.types]

T = NamedTuple{(col_names...,),Tuple{col_types...}}

rc = RecCursor(p, 1:nrows(p), colnames(p), JuliaBuilder(p, T_row))

it = ParquetNamedTupleIterator{T,T_row}(rc, nrows(p))

return it
p = ParFile(file.filename; map_logical_types=true)
return RecordCursor(p)
end

end # module
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ using Test
@test ar[1] == (n_nationkey = 0, n_name = "ALGERIA", n_regionkey = 0, n_comment = " haggle. carefully final deposits detect slyly agai")

@test sprint((stream, data)->show(stream, "text/html", data), pqf)[1:100] == "<table><thead><tr><th>n_nationkey</th><th>n_name</th><th>n_regionkey</th><th>n_comment</th></tr></th"
@test sprint((stream, data)->show(stream, "application/vnd.dataresource+json", data), pqf)[1:100] == "{\"schema\":{\"fields\":[{\"name\":\"n_nationkey\",\"type\":\"integer\"},{\"name\":\"n_name\",\"type\":\"string\"},{\"nam"
@test sprint((stream, data)->show(stream, "application/vnd.dataresource+json", data), pqf)[1:100] == "{\"schema\":{\"fields\":[{\"name\":\"n_nationkey\",\"type\":\"string\"},{\"name\":\"n_name\",\"type\":\"string\"},{\"name"
@test sprint(show, pqf)[1:100] == "25x4 Parquet file\nn_nationkey │ n_name │ n_regionkey\n────────────┼─"
@test showable("text/html", pqf) == true
@test showable("application/vnd.dataresource+json", pqf) == true
Expand Down