diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 878f51a2b9eac..218002ebb3f6a 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -21,17 +21,13 @@ AbstractMethodError, Pandas4Warning, ) -from pandas.util._decorators import ( - doc, - set_module, -) +from pandas.util._decorators import set_module from pandas.util._validators import check_dtype_backend from pandas import ( DataFrame, get_option, ) -from pandas.core.shared_docs import _shared_docs from pandas.io._util import arrow_table_to_pandas from pandas.io.common import ( @@ -410,7 +406,6 @@ def read( handles.close() -@doc(storage_options=_shared_docs["storage_options"]) def to_parquet( df: DataFrame, path: FilePath | WriteBuffer[bytes] | None = None, @@ -430,10 +425,10 @@ def to_parquet( df : DataFrame path : str, path object, file-like object, or None, default None String, path object (implementing ``os.PathLike[str]``), or file-like - object implementing a binary ``write()`` function. If None, the result is - returned as bytes. If a string, it will be used as Root Directory path - when writing a partitioned dataset. The engine fastparquet does not - accept file-like objects. + object implementing a binary ``write()`` function. If None, the result + is returned as bytes. If a string, it will be used as Root Directory + path when writing a partitioned dataset. The engine fastparquet does + not accept file-like objects. engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` @@ -460,8 +455,15 @@ def to_parquet( Column names by which to partition the dataset. Columns are partitioned in the order they are given. Must be None if path is not a string. - {storage_options} - + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value + pairs are forwarded to ``urllib.request.Request`` as header options. + For other URLs (e.g. starting with "s3://", and "gcs://") the + key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` + and ``urllib`` for more details, and for more examples on storage + options refer `here `_. filesystem : fsspec or pyarrow filesystem, default None Filesystem object to use when reading the parquet file. Only implemented for ``engine="pyarrow"``. @@ -504,7 +506,6 @@ def to_parquet( @set_module("pandas") -@doc(storage_options=_shared_docs["storage_options"]) def read_parquet( path: FilePath | ReadBuffer[bytes], engine: str = "auto", @@ -547,8 +548,15 @@ def read_parquet( if you wish to use its implementation. columns : list, default=None If not None, only these columns will be read from the file. - {storage_options} - + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value + pairs are forwarded to ``urllib.request.Request`` as header options. + For other URLs (e.g. starting with "s3://", and "gcs://") the + key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` + and ``urllib`` for more details, and for more examples on storage + options refer `here `_. dtype_backend : {{'numpy_nullable', 'pyarrow'}} Back-end data type applied to the resultant :class:`DataFrame` (still experimental). If not specified, the default behavior @@ -609,7 +617,7 @@ def read_parquet( Examples -------- - >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) + >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) >>> original_df foo bar 0 0 5