Skip to content
40 changes: 24 additions & 16 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,13 @@
AbstractMethodError,
Pandas4Warning,
)
from pandas.util._decorators import (
doc,
set_module,
)
from pandas.util._decorators import set_module
from pandas.util._validators import check_dtype_backend

from pandas import (
DataFrame,
get_option,
)
from pandas.core.shared_docs import _shared_docs

from pandas.io._util import arrow_table_to_pandas
from pandas.io.common import (
Expand Down Expand Up @@ -410,7 +406,6 @@ def read(
handles.close()


@doc(storage_options=_shared_docs["storage_options"])
def to_parquet(
df: DataFrame,
path: FilePath | WriteBuffer[bytes] | None = None,
Expand All @@ -430,10 +425,10 @@ def to_parquet(
df : DataFrame
path : str, path object, file-like object, or None, default None
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``write()`` function. If None, the result is
returned as bytes. If a string, it will be used as Root Directory path
when writing a partitioned dataset. The engine fastparquet does not
accept file-like objects.
object implementing a binary ``write()`` function. If None, the result
is returned as bytes. If a string, it will be used as Root Directory
path when writing a partitioned dataset. The engine fastparquet does
not accept file-like objects.
engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
Parquet library to use. If 'auto', then the option
``io.parquet.engine`` is used. The default ``io.parquet.engine``
Expand All @@ -460,8 +455,15 @@ def to_parquet(
Column names by which to partition the dataset.
Columns are partitioned in the order they are given.
Must be None if path is not a string.
{storage_options}

storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value
pairs are forwarded to ``urllib.request.Request`` as header options.
For other URLs (e.g. starting with "s3://", and "gcs://") the
key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec``
and ``urllib`` for more details, and for more examples on storage
options refer `here <https://pandas.pydata.org/docs/user_guide/io.html?
highlight=storage_options#reading-writing-remote-files>`_.
filesystem : fsspec or pyarrow filesystem, default None
Filesystem object to use when reading the parquet file. Only implemented
for ``engine="pyarrow"``.
Expand Down Expand Up @@ -504,7 +506,6 @@ def to_parquet(


@set_module("pandas")
@doc(storage_options=_shared_docs["storage_options"])
def read_parquet(
path: FilePath | ReadBuffer[bytes],
engine: str = "auto",
Expand Down Expand Up @@ -547,8 +548,15 @@ def read_parquet(
if you wish to use its implementation.
columns : list, default=None
If not None, only these columns will be read from the file.
{storage_options}

storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value
pairs are forwarded to ``urllib.request.Request`` as header options.
For other URLs (e.g. starting with "s3://", and "gcs://") the
key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec``
and ``urllib`` for more details, and for more examples on storage
options refer `here <https://pandas.pydata.org/docs/user_guide/io.html?
highlight=storage_options#reading-writing-remote-files>`_.
dtype_backend : {{'numpy_nullable', 'pyarrow'}}
Back-end data type applied to the resultant :class:`DataFrame`
(still experimental). If not specified, the default behavior
Expand Down Expand Up @@ -609,7 +617,7 @@ def read_parquet(

Examples
--------
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
>>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
>>> original_df
foo bar
0 0 5
Expand Down
Loading