Skip to content

Commit c6c1453

Browse files
ArrayRecord Teamcopybara-github
authored andcommitted
Revert the last cpp protobuf update and GCS reader support.
The required riegeli version depends on protobug that is incompatible with TF. Ported from #175 PiperOrigin-RevId: 829128090
1 parent d73af39 commit c6c1453

5 files changed

Lines changed: 41 additions & 34 deletions

File tree

MODULE.bazel

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,41 +13,34 @@
1313
# limitations under the License.
1414

1515
# TODO(fchern): automate version string alignment with setup.py
16-
VERSION = "0.8.2"
16+
VERSION = "0.8.0"
1717

1818
module(
1919
name = "array_record",
2020
version = VERSION,
2121
repo_name = "com_google_array_record",
2222
)
2323

24-
bazel_dep(name = "rules_proto", version = "7.0.2")
25-
bazel_dep(name = "rules_python", version = "1.6.0")
24+
bazel_dep(name = "rules_proto", version = "7.1.0")
25+
bazel_dep(name = "rules_python", version = "1.4.1")
2626
bazel_dep(name = "platforms", version = "0.0.11")
27-
28-
# We force the protobuf version to be 28.3 because 29.x (that is required by rules_python) causes
29-
# segfault when importing TensorFlow due to a conflict with TF's protobuf version.
30-
bazel_dep(name = "protobuf", version = "29.0-rc3")
31-
single_version_override(
32-
module_name = "protobuf",
33-
version = "28.3",
34-
)
35-
27+
bazel_dep(name = "protobuf", version = "31.1")
3628
bazel_dep(name = "googletest", version = "1.15.2")
37-
bazel_dep(name = "abseil-cpp", version = "20240722.0")
29+
bazel_dep(name = "abseil-cpp", version = "20250127.1")
3830
bazel_dep(name = "abseil-py", version = "2.1.0")
3931
bazel_dep(name = "eigen", version = "3.4.0.bcr.3")
40-
bazel_dep(name = "riegeli", version = "0.0.0-20241218-3385e3c")
32+
bazel_dep(name = "riegeli", version = "0.0.0-20250717-5b2e77e")
4133
bazel_dep(name = "pybind11_bazel", version = "2.12.0")
34+
bazel_dep(name = "google_cloud_cpp", version = "3.0.0-rc0")
4235

4336
SUPPORTED_PYTHON_VERSIONS = [
37+
"3.10",
4438
"3.11",
4539
"3.12",
4640
"3.13",
47-
"3.14",
4841
]
4942

50-
DEFAULT_PYTHON_VERSION = "3.13"
43+
DEFAULT_PYTHON_VERSION = "3.10"
5144

5245
python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension")
5346
use_repo(python_configure, "local_config_python")

oss/build_whl.sh

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ function main() {
2828
write_to_bazelrc "build --host_cxxopt=-std=c++17"
2929
write_to_bazelrc "build --experimental_repo_remote_exec"
3030
write_to_bazelrc "common --check_direct_dependencies=error"
31-
# Reduce noise during build.
32-
write_to_bazelrc "build --cxxopt=-Wno-deprecated-declarations --host_cxxopt=-Wno-deprecated-declarations"
33-
write_to_bazelrc "build --cxxopt=-Wno-parentheses --host_cxxopt=-Wno-parentheses"
34-
write_to_bazelrc "build --cxxopt=-Wno-sign-compare --host_cxxopt=-Wno-sign-compare"
35-
3631
PLATFORM="$(uname)"
3732

3833
if [ -n "${CROSSTOOL_TOP}" ]; then
@@ -95,11 +90,11 @@ function main() {
9590
$PYTHON_BIN -m pip install ${OUTPUT_DIR}/all_dist/array_record*.whl
9691
$PYTHON_BIN -c 'import array_record'
9792
$PYTHON_BIN -c 'from array_record.python import array_record_data_source'
98-
# TF does not have a Python 3.14 wheel yet.
99-
if (( "${PYTHON_MINOR_VERSION}" < 14 )); then
100-
$PYTHON_BIN -m pip install jax tensorflow>=2.20.0 grain --only-binary h5py
101-
$PYTHON_BIN oss/test_import_tensorflow.py
93+
# TF is not available on Python 3.13 and above.
94+
if [ "$(uname)" != "Darwin" ] && (( "${PYTHON_MINOR_VERSION}" < 13 )); then
95+
$PYTHON_BIN -m pip install jax tensorflow>=2.20.0 grain
10296
$PYTHON_BIN oss/test_import_grain.py
97+
$PYTHON_BIN oss/test_import_tensorflow.py
10398
fi
10499
}
105100

python/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ pybind_extension(
2020
"@riegeli//riegeli/base:initializer",
2121
"@riegeli//riegeli/bytes:fd_reader",
2222
"@riegeli//riegeli/bytes:fd_writer",
23+
"@riegeli//riegeli/gcs:gcs_object",
24+
"@riegeli//riegeli/gcs:gcs_reader",
2325
],
2426
)
2527

python/array_record_module.cc

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ limitations under the License.
2222
#include <vector>
2323

2424
#include "absl/status/status.h"
25+
#include "absl/strings/match.h"
2526
#include "absl/strings/str_format.h"
2627
#include "absl/strings/string_view.h"
2728
#include "cpp/array_record_reader.h"
@@ -34,6 +35,8 @@ limitations under the License.
3435
#include "riegeli/base/maker.h"
3536
#include "riegeli/bytes/fd_reader.h"
3637
#include "riegeli/bytes/fd_writer.h"
38+
#include "riegeli/gcs/gcs_object.h"
39+
#include "riegeli/gcs/gcs_reader.h"
3740

3841
namespace py = pybind11;
3942

@@ -50,10 +53,13 @@ PYBIND11_MODULE(array_record_module, m) {
5053
throw py::value_error(
5154
std::string(status_or_option.status().message()));
5255
}
56+
riegeli::FdWriterBase::Options file_writer_options;
57+
file_writer_options.set_buffer_size(size_t{16} << 20);
5358
// Release the GIL because IO is time consuming.
5459
py::gil_scoped_release scoped_release;
5560
return new array_record::ArrayRecordWriter(
56-
riegeli::Maker<riegeli::FdWriter>(path),
61+
riegeli::Maker<riegeli::FdWriter>(
62+
path, std::move(file_writer_options)),
5763
status_or_option.value());
5864
}),
5965
py::arg("path"), py::arg("options") = "")
@@ -84,18 +90,29 @@ PYBIND11_MODULE(array_record_module, m) {
8490
std::string(status_or_option.status().message()));
8591
}
8692
riegeli::FdReaderBase::Options file_reader_options;
93+
riegeli::GcsReader::Options gcs_reader_options;
8794
if (kwargs.contains("file_reader_buffer_size")) {
8895
auto file_reader_buffer_size =
8996
kwargs["file_reader_buffer_size"].cast<int64_t>();
9097
file_reader_options.set_buffer_size(file_reader_buffer_size);
98+
gcs_reader_options.set_buffer_size(file_reader_buffer_size);
9199
}
92100
// Release the GIL because IO is time consuming.
93101
py::gil_scoped_release scoped_release;
94-
return new array_record::ArrayRecordReader(
95-
riegeli::Maker<riegeli::FdReader>(
96-
path, std::move(file_reader_options)),
97-
status_or_option.value(),
98-
array_record::ArrayRecordGlobalPool());
102+
if (absl::StartsWith(path, "gs://")) {
103+
return new array_record::ArrayRecordReader(
104+
riegeli::Maker<riegeli::GcsReader>(
105+
google::cloud::storage::Client(),
106+
riegeli::GcsObject(path), std::move(gcs_reader_options)),
107+
status_or_option.value(),
108+
array_record::ArrayRecordGlobalPool());
109+
} else {
110+
return new array_record::ArrayRecordReader(
111+
riegeli::Maker<riegeli::FdReader>(
112+
path, std::move(file_reader_options)),
113+
status_or_option.value(),
114+
array_record::ArrayRecordGlobalPool());
115+
}
99116
}),
100117
py::arg("path"), py::arg("options") = "", R"(
101118
ArrayRecordReader for fast sequential or random access.

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,23 @@ def has_ext_modules(self):
3131

3232
setup(
3333
name='array_record',
34-
version='0.8.2',
34+
version='0.8.0',
3535
description='A file format that achieves a new frontier of IO efficiency',
3636
author='ArrayRecord team',
3737
author_email='no-reply@google.com',
3838
packages=find_packages(),
3939
include_package_data=True,
4040
package_data={'': ['*.so']},
41-
python_requires='>=3.11',
41+
python_requires='>=3.10',
4242
install_requires=REQUIRED_PACKAGES,
4343
extras_require={'beam': BEAM_EXTRAS, 'test': TEST_EXTRAS},
4444
url='https://github.com/google/array_record',
4545
license='Apache-2.0',
4646
classifiers=[
47+
'Programming Language :: Python :: 3.10',
4748
'Programming Language :: Python :: 3.11',
4849
'Programming Language :: Python :: 3.12',
4950
'Programming Language :: Python :: 3.13',
50-
'Programming Language :: Python :: 3.14',
5151
],
5252
zip_safe=False,
5353
distclass=BinaryDistribution,

0 commit comments

Comments
 (0)