diff --git a/Project.toml b/Project.toml index 46018b2..c6a77e1 100644 --- a/Project.toml +++ b/Project.toml @@ -22,7 +22,7 @@ DataValues = "^0.4.5" FileIO = "1" IterableTables = "0.9, 0.10, 0.11, 1" IteratorInterfaceExtensions = "^0.1.1, 1" -Parquet = "0.2, 0.3, 0.4" +Parquet = "0.5" TableShowUtils = "0.2" TableTraits = "0.4, 1" julia = "1" diff --git a/src/ParquetFiles.jl b/src/ParquetFiles.jl index 2378219..ac0229a 100644 --- a/src/ParquetFiles.jl +++ b/src/ParquetFiles.jl @@ -23,33 +23,6 @@ function Base.show(io::IO, ::MIME"application/vnd.dataresource+json", source::Pa end Base.Multimedia.showable(::MIME"application/vnd.dataresource+json", source::ParquetFile) = true -struct ParquetNamedTupleIterator{T,T_row} - rc::RecCursor - nrows::Int -end - -function Base.eltype(itr::ParquetNamedTupleIterator{T,T_row}) where {T,T_row} - return T -end - -function Base.length(itr::ParquetNamedTupleIterator) - return itr.nrows -end - -@generated function Base.iterate(itr::ParquetNamedTupleIterator{T,T_row}, state...) where {T,T_row} - names = fieldnames(T) - quote - y = iterate(itr.rc, state...) - if y === nothing - return nothing - else - v = y[1] - next_state = y[2] - return T(($([fieldtype(T, i) <: String ? :(String(copy(v.$(names[i])))) : :(v.$(names[i])) for i = 1:length(names)]...),)), next_state - end - end -end - function fileio_load(f::FileIO.File{FileIO.format"Parquet"}) return ParquetFile(f.filename) end @@ -58,24 +31,8 @@ IteratorInterfaceExtensions.isiterable(x::ParquetFile) = true TableTraits.isiterabletable(x::ParquetFile) = true function IteratorInterfaceExtensions.getiterator(file::ParquetFile) - p = ParFile(file.filename) - - T_row_name = Symbol("RCType$(String(gensym())[3:end])") - - schema(JuliaConverter(ParquetFiles), p, T_row_name) - - T_row = eval(T_row_name) - - col_names = fieldnames(T_row) - col_types = [i <: Vector{UInt8} ? String : i for i in T_row.types] - - T = NamedTuple{(col_names...,),Tuple{col_types...}} - - rc = RecCursor(p, 1:nrows(p), colnames(p), JuliaBuilder(p, T_row)) - - it = ParquetNamedTupleIterator{T,T_row}(rc, nrows(p)) - - return it + p = ParFile(file.filename; map_logical_types=true) + return RecordCursor(p) end end # module diff --git a/test/runtests.jl b/test/runtests.jl index 669a92d..3e47e56 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,7 +25,7 @@ using Test @test ar[1] == (n_nationkey = 0, n_name = "ALGERIA", n_regionkey = 0, n_comment = " haggle. carefully final deposits detect slyly agai") @test sprint((stream, data)->show(stream, "text/html", data), pqf)[1:100] == "show(stream, "application/vnd.dataresource+json", data), pqf)[1:100] == "{\"schema\":{\"fields\":[{\"name\":\"n_nationkey\",\"type\":\"integer\"},{\"name\":\"n_name\",\"type\":\"string\"},{\"nam" + @test sprint((stream, data)->show(stream, "application/vnd.dataresource+json", data), pqf)[1:100] == "{\"schema\":{\"fields\":[{\"name\":\"n_nationkey\",\"type\":\"string\"},{\"name\":\"n_name\",\"type\":\"string\"},{\"name" @test sprint(show, pqf)[1:100] == "25x4 Parquet file\nn_nationkey │ n_name │ n_regionkey\n────────────┼─" @test showable("text/html", pqf) == true @test showable("application/vnd.dataresource+json", pqf) == true
n_nationkeyn_namen_regionkeyn_comment