From 977f9d3156da7b285f8881d6c854ddf579920ddd Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Fri, 19 Dec 2025 13:50:01 +0800 Subject: [PATCH 1/5] DOC: Replace @doc decorator with inline docstrings in pandas/io/pickle.py --- pandas/io/pickle.py | 78 ++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 22 deletions(-) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index df1c35e0dabee..e013fd96088d8 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -10,12 +10,7 @@ import warnings from pandas.compat import pickle_compat -from pandas.util._decorators import ( - doc, - set_module, -) - -from pandas.core.shared_docs import _shared_docs +from pandas.util._decorators import set_module from pandas.io.common import get_handle @@ -35,10 +30,6 @@ @set_module("pandas") -@doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", -) def to_pickle( obj: Any, filepath_or_buffer: FilePath | WriteBuffer[bytes], @@ -57,8 +48,24 @@ def to_pickle( String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``write()`` function. Also accepts URL. URL has to be of S3 or GCS. - {compression_options} - + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'filepath_or_buffer' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. protocol : int Int which indicates which protocol should be used by the pickler, default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible @@ -67,8 +74,15 @@ def to_pickle( For Python >= 3.4, 4 is a valid value. A negative value for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL. - - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. .. [1] https://docs.python.org/3/library/pickle.html @@ -117,10 +131,6 @@ def to_pickle( @set_module("pandas") -@doc( - storage_options=_shared_docs["storage_options"], - decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", -) def read_pickle( filepath_or_buffer: FilePath | ReadPickleBuffer, compression: CompressionOptions = "infer", @@ -140,10 +150,34 @@ def read_pickle( String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``readlines()`` function. Also accepts URL. URL is not limited to S3 and GCS. - - {decompression_options} - - {storage_options} + compression : str or dict, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. + Set to ``None`` for no decompression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for Zstandard decompression using a + custom compression dictionary: + ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. Returns ------- From 43b2f81bdc086d6f778dfd1b2d32c6b35c8e2f46 Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Fri, 19 Dec 2025 13:55:57 +0800 Subject: [PATCH 2/5] DOC: Fix line length issues in pandas/io/pickle.py --- pandas/io/pickle.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index e013fd96088d8..844987d65f94f 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -49,19 +49,19 @@ def to_pickle( object implementing a binary ``write()`` function. Also accepts URL. URL has to be of S3 or GCS. compression : str or dict, default 'infer' - For on-the-fly compression of the output data. If 'infer' and 'filepath_or_buffer' is - path-like, then detect compression from the following extensions: '.gz', - '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' - (otherwise no compression). + For on-the-fly compression of the output data. If 'infer' and + 'filepath_or_buffer' is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar', + '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression). Set to ``None`` for no compression. Can also be a dict with key ``'method'`` set - to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and - other key-value pairs are forwarded to + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or ``tarfile.TarFile``, respectively. - As an example, the following could be passed for faster compression and to create - a reproducible gzip archive: + As an example, the following could be passed for faster compression + and to create a reproducible gzip archive: ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. .. versionadded:: 1.5.0 @@ -151,20 +151,21 @@ def read_pickle( object implementing a binary ``readlines()`` function. Also accepts URL. URL is not limited to S3 and GCS. compression : str or dict, default 'infer' - For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is - path-like, then detect compression from the following extensions: '.gz', - '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' - (otherwise no compression). - If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. + For on-the-fly decompression of on-disk data. If 'infer' and + 'filepath_or_buffer' is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar', + '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression). + If using 'zip' or 'tar', the ZIP file must contain only one data file + to be read in. Set to ``None`` for no decompression. Can also be a dict with key ``'method'`` set - to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and - other key-value pairs are forwarded to + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or ``tarfile.TarFile``, respectively. - As an example, the following could be passed for Zstandard decompression using a - custom compression dictionary: + As an example, the following could be passed for Zstandard decompression + using a custom compression dictionary: ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. .. versionadded:: 1.5.0 From 67c9fe30c0dafda102f4215be2174ba01cdc862f Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Sat, 20 Dec 2025 11:13:49 +0800 Subject: [PATCH 3/5] DOC: Remove versionadded notes from pickle.py compression parameter --- pandas/io/pickle.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 844987d65f94f..0f2b380bc70bf 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -63,9 +63,6 @@ def to_pickle( As an example, the following could be passed for faster compression and to create a reproducible gzip archive: ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. - - .. versionadded:: 1.5.0 - Added support for `.tar` files. protocol : int Int which indicates which protocol should be used by the pickler, default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible @@ -167,9 +164,6 @@ def read_pickle( As an example, the following could be passed for Zstandard decompression using a custom compression dictionary: ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. - - .. versionadded:: 1.5.0 - Added support for `.tar` files. storage_options : dict, optional Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value pairs From ad74154751c008b51adca366ed8075b8c9cba76d Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Sun, 21 Dec 2025 11:45:06 +0800 Subject: [PATCH 4/5] DOC: Replace @doc decorator with inlined docstrings in pandas/io/parquet.py --- pandas/io/parquet.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 878f51a2b9eac..d95a79ee9247c 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -21,17 +21,13 @@ AbstractMethodError, Pandas4Warning, ) -from pandas.util._decorators import ( - doc, - set_module, -) +from pandas.util._decorators import set_module from pandas.util._validators import check_dtype_backend from pandas import ( DataFrame, get_option, ) -from pandas.core.shared_docs import _shared_docs from pandas.io._util import arrow_table_to_pandas from pandas.io.common import ( @@ -410,7 +406,6 @@ def read( handles.close() -@doc(storage_options=_shared_docs["storage_options"]) def to_parquet( df: DataFrame, path: FilePath | WriteBuffer[bytes] | None = None, @@ -430,10 +425,10 @@ def to_parquet( df : DataFrame path : str, path object, file-like object, or None, default None String, path object (implementing ``os.PathLike[str]``), or file-like - object implementing a binary ``write()`` function. If None, the result is - returned as bytes. If a string, it will be used as Root Directory path - when writing a partitioned dataset. The engine fastparquet does not - accept file-like objects. + object implementing a binary ``write()`` function. If None, the result + is returned as bytes. If a string, it will be used as Root Directory + path when writing a partitioned dataset. The engine fastparquet does + not accept file-like objects. engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` @@ -460,8 +455,15 @@ def to_parquet( Column names by which to partition the dataset. Columns are partitioned in the order they are given. Must be None if path is not a string. - {storage_options} - + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value + pairs are forwarded to ``urllib.request.Request`` as header options. + For other URLs (e.g. starting with "s3://", and "gcs://") the + key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` + and ``urllib`` for more details, and for more examples on storage + options refer `here `_. filesystem : fsspec or pyarrow filesystem, default None Filesystem object to use when reading the parquet file. Only implemented for ``engine="pyarrow"``. @@ -504,7 +506,6 @@ def to_parquet( @set_module("pandas") -@doc(storage_options=_shared_docs["storage_options"]) def read_parquet( path: FilePath | ReadBuffer[bytes], engine: str = "auto", @@ -547,8 +548,15 @@ def read_parquet( if you wish to use its implementation. columns : list, default=None If not None, only these columns will be read from the file. - {storage_options} - + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value + pairs are forwarded to ``urllib.request.Request`` as header options. + For other URLs (e.g. starting with "s3://", and "gcs://") the + key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` + and ``urllib`` for more details, and for more examples on storage + options refer `here `_. dtype_backend : {{'numpy_nullable', 'pyarrow'}} Back-end data type applied to the resultant :class:`DataFrame` (still experimental). If not specified, the default behavior From ec4aeadb066151261edfad2779284bb2e78b419d Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Sun, 21 Dec 2025 12:08:09 +0800 Subject: [PATCH 5/5] FIX: Remove double curly braces in read_parquet doctest example --- pandas/io/parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index d95a79ee9247c..218002ebb3f6a 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -617,7 +617,7 @@ def read_parquet( Examples -------- - >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) + >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) >>> original_df foo bar 0 0 5