diff --git a/docker/1.4-2-py311/base/Dockerfile.cpu b/docker/1.4-2-py311/base/Dockerfile.cpu new file mode 100644 index 00000000..12ea0382 --- /dev/null +++ b/docker/1.4-2-py311/base/Dockerfile.cpu @@ -0,0 +1,91 @@ +ARG UBUNTU_VERSION=24.04 +ARG UBUNTU_IMAGE_DIGEST=b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b + +# Build stage for SQLite compilation +FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST} as sqlite-builder +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential wget ca-certificates && \ + cd /tmp && \ + wget https://www.sqlite.org/2025/sqlite-autoconf-3500200.tar.gz && \ + tar xzf sqlite-autoconf-3500200.tar.gz && \ + cd sqlite-autoconf-3500200 && \ + ./configure --prefix=/usr/local && \ + make && \ + make install && \ + ldconfig && \ + cd / && \ + rm -rf /tmp/sqlite-autoconf-3500200 /tmp/sqlite-autoconf-3500200.tar.gz && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Main image +FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST} + +ARG PYTHON_VERSION=3.11 + +ENV DEBIAN_FRONTEND=noninteractive + +# Install runtime dependencies only +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get -y install --no-install-recommends \ + curl git jq libatlas-base-dev nginx openjdk-8-jdk-headless unzip wget expat tzdata apparmor \ + libgstreamer1.0-0 libxml2 libsqlite3-0 software-properties-common ca-certificates lsb-release \ + build-essential linux-libc-dev && \ + # Add Apache Arrow repository for runtime libraries only + wget https://packages.apache.org/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ + apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ + apt-get update && \ + apt-get install -y -V libarrow-dev=17.0.0-1 libarrow-dataset-dev=17.0.0-1 libparquet-dev=17.0.0-1 libarrow-acero-dev=17.0.0-1 && \ + # Add deadsnakes PPA for Python 3.11 + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get -y install --no-install-recommends \ + python3.11 python3.11-distutils python3.11-dev && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ + # Create python symlink for backward compatibility + ln -sf /usr/bin/python3 /usr/bin/python && \ + curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN ln -fs /usr/share/zoneinfo/UTC /etc/localtime && \ + dpkg-reconfigure --frontend noninteractive tzdata + +# Install uv for fast Python package management +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + mv /root/.local/bin/uv /usr/local/bin/uv + +ENV PATH=/usr/local/bin:${PATH} +ENV PIP_ROOT_USER_ACTION=ignore + +# Copy compiled SQLite from builder stage +COPY --from=sqlite-builder /usr/local/bin/sqlite3 /usr/local/bin/sqlite3 +COPY --from=sqlite-builder /usr/local/lib/libsqlite3.* /usr/local/lib/ +COPY --from=sqlite-builder /usr/local/include/sqlite3*.h /usr/local/include/ + +# Update library cache and ensure /usr/local/bin is in PATH +RUN ldconfig && \ + echo "/usr/local/lib" > /etc/ld.so.conf.d/sqlite3.conf && \ + ldconfig + +ENV PATH="/usr/local/bin:${PATH}" + +# This command will check the version and print it to the build logs +RUN sqlite3 --version + +# Install awscli +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip awscliv2.zip && \ + ./aws/install && \ + rm -r aws awscliv2.zip + +# Python won't try to write .pyc or .pyo files on the import of source modules +# Force stdin, stdout and stderr to be totally unbuffered. Good for logging +ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 + +# Install core scientific packages with exact versions +RUN uv pip install --system --no-cache \ + numpy==2.1.0 \ + scikit-learn==1.4.2 \ + pyarrow==17.0.0 diff --git a/docker/1.4-2/extension/Dockerfile.cpu b/docker/1.4-2-py311/extension/Dockerfile.cpu similarity index 94% rename from docker/1.4-2/extension/Dockerfile.cpu rename to docker/1.4-2-py311/extension/Dockerfile.cpu index 8550725e..4ff3b08d 100644 --- a/docker/1.4-2/extension/Dockerfile.cpu +++ b/docker/1.4-2-py311/extension/Dockerfile.cpu @@ -1,4 +1,4 @@ -FROM preprod-sklearn:1.4-2 +FROM preprod-sklearn:1.4-2-py311 RUN pip freeze | grep -q 'scikit-learn==1.4.2'; \ if [ $? -eq 0 ]; \ diff --git a/docker/1.4-2/extension/README.md b/docker/1.4-2-py311/extension/README.md similarity index 100% rename from docker/1.4-2/extension/README.md rename to docker/1.4-2-py311/extension/README.md diff --git a/docker/1.4-2/final/Dockerfile.cpu b/docker/1.4-2-py311/final/Dockerfile.cpu similarity index 89% rename from docker/1.4-2/final/Dockerfile.cpu rename to docker/1.4-2-py311/final/Dockerfile.cpu index bd7148c9..ec648aa5 100644 --- a/docker/1.4-2/final/Dockerfile.cpu +++ b/docker/1.4-2-py311/final/Dockerfile.cpu @@ -1,5 +1,5 @@ -FROM sklearn-base:1.4-2 -ENV SAGEMAKER_SKLEARN_VERSION 1.4-2 +FROM sklearn-base:1.4-2-py311 +ENV SAGEMAKER_SKLEARN_VERSION 1.4-2-py311 ENV PIP_ROOT_USER_ACTION=ignore LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true @@ -9,8 +9,8 @@ COPY requirements.txt /requirements.txt RUN uv pip install --system -r /requirements.txt && \ rm /requirements.txt -# Fix Python 3.10 compatibility for sagemaker-containers -RUN python3 -c "import sys; import os; site_packages = '/usr/local/lib/python3.10/dist-packages'; mapping_file = os.path.join(site_packages, 'sagemaker_containers/_mapping.py'); exec('if os.path.exists(mapping_file):\\n with open(mapping_file, \"r\") as f:\\n content = f.read()\\n content = content.replace(\"collections.Mapping\", \"collections.abc.Mapping\")\\n with open(mapping_file, \"w\") as f:\\n f.write(content)')" +# Fix Python 3.11 compatibility for sagemaker-containers +RUN python3 -c "import sys; import os; site_packages = '/usr/local/lib/python3.11/dist-packages'; mapping_file = os.path.join(site_packages, 'sagemaker_containers/_mapping.py'); exec('if os.path.exists(mapping_file):\\n with open(mapping_file, \"r\") as f:\\n content = f.read()\\n content = content.replace(\"collections.Mapping\", \"collections.abc.Mapping\")\\n with open(mapping_file, \"w\") as f:\\n f.write(content)')" COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl RUN uv pip install --system --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ @@ -56,4 +56,4 @@ EXPOSE 8080 ENV TEMP=/home/model-server/tmp # Required label for multi-model loading -LABEL com.amazonaws.sagemaker.capabilities.multi-models=true +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true \ No newline at end of file diff --git a/docker/1.4-2/resources/libffi7_3.3-6_arm64.deb b/docker/1.4-2-py311/resources/libffi7_3.3-6_arm64.deb similarity index 100% rename from docker/1.4-2/resources/libffi7_3.3-6_arm64.deb rename to docker/1.4-2-py311/resources/libffi7_3.3-6_arm64.deb diff --git a/docker/1.4-2/resources/mms/ExecutionParameters.java b/docker/1.4-2-py311/resources/mms/ExecutionParameters.java similarity index 100% rename from docker/1.4-2/resources/mms/ExecutionParameters.java rename to docker/1.4-2-py311/resources/mms/ExecutionParameters.java diff --git a/docker/1.4-2/resources/mms/config.properties.tmp b/docker/1.4-2-py311/resources/mms/config.properties.tmp similarity index 100% rename from docker/1.4-2/resources/mms/config.properties.tmp rename to docker/1.4-2-py311/resources/mms/config.properties.tmp diff --git a/docker/1.4-2/resources/mms/endpoints-1.0.jar b/docker/1.4-2-py311/resources/mms/endpoints-1.0.jar similarity index 100% rename from docker/1.4-2/resources/mms/endpoints-1.0.jar rename to docker/1.4-2-py311/resources/mms/endpoints-1.0.jar diff --git a/docker/1.4-2/base/Dockerfile.cpu b/docker/1.4-2/base/Dockerfile.cpu deleted file mode 100644 index 24597f46..00000000 --- a/docker/1.4-2/base/Dockerfile.cpu +++ /dev/null @@ -1,193 +0,0 @@ -ARG UBUNTU_VERSION=24.04 -ARG UBUNTU_IMAGE_DIGEST=b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b - -# Build stage for SQLite compilation -FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST} as sqlite-builder -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential wget ca-certificates && \ - cd /tmp && \ - wget https://www.sqlite.org/2025/sqlite-autoconf-3500200.tar.gz && \ - tar xzf sqlite-autoconf-3500200.tar.gz && \ - cd sqlite-autoconf-3500200 && \ - ./configure --prefix=/usr/local && \ - make && \ - make install && \ - ldconfig && \ - cd / && \ - rm -rf /tmp/sqlite-autoconf-3500200 /tmp/sqlite-autoconf-3500200.tar.gz && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# MLIO builder stage with Ubuntu 20.04 -FROM ubuntu:20.04@sha256:874aca52f79ae5f8258faff03e10ce99ae836f6e7d2df6ecd3da5c1cad3a912b as mlio-builder - -ARG PYTHON_VERSION=3.10 -ARG MLIO_VERSION=v0.9.0 -ARG PYARROW_VERSION=17.0.0 - -ENV DEBIAN_FRONTEND=noninteractive - -# Install uv and Python 3.10 -RUN apt-get update && \ - apt-get -y install --no-install-recommends \ - build-essential curl git wget ca-certificates lsb-release software-properties-common && \ - # Install uv - curl -LsSf https://astral.sh/uv/install.sh | sh && \ - mv /root/.local/bin/uv /usr/local/bin/uv && \ - # Install Python 3.10 with uv - uv python install 3.10 && \ - ln -sf $(uv python find 3.10) /usr/bin/python3.10 && \ - update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 - - # Add Apache Arrow repository (hardcoded for Ubuntu 20.04 focal) -RUN wget https://packages.apache.org/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-focal.deb && \ - apt install -y -V ./apache-arrow-apt-source-latest-focal.deb && \ - apt-get update && \ - apt-get install -y -V libarrow-dev=17.0.0-1 libarrow-dataset-dev=17.0.0-1 libparquet-dev=17.0.0-1 libarrow-acero-dev=17.0.0-1 && \ - # MLIO build dependencies - wget http://es.archive.ubuntu.com/ubuntu/pool/main/libf/libffi/libffi7_3.3-4_amd64.deb && \ - dpkg -i libffi7_3.3-4_amd64.deb && \ - apt-get -y install --no-install-recommends \ - apt-transport-https gnupg && \ - wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ - gpg --dearmor - | \ - tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ - echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ - apt-get update && \ - rm /usr/share/keyrings/kitware-archive-keyring.gpg && \ - apt-get install -y --no-install-recommends \ - autoconf automake cmake cmake-data doxygen kitware-archive-keyring libcurl4-openssl-dev libssl-dev libtool ninja-build zlib1g-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN ln -fs /usr/share/zoneinfo/UTC /etc/localtime && \ - dpkg-reconfigure --frontend noninteractive tzdata - -ENV PIP_ROOT_USER_ACTION=ignore - -# Build MLIO from scratch -# Clone MLIO repository -RUN cd /tmp && \ - git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio - -# Patch MLIO for Arrow 17.0.0 -RUN cd /tmp/mlio && \ - sed -i 's/find_package(Arrow 14.0.1 REQUIRED/find_package(Arrow 17.0.0 REQUIRED/g' CMakeLists.txt && \ - sed -i 's/pyarrow==14.0.1/pyarrow==17.0.0/g' src/mlio-py/setup.py - -# Build MLIO third-party dependencies (includes Arrow C++) -RUN cd /tmp/mlio && \ - build-tools/build-dependency build/third-party all - -# Configure MLIO build -RUN cd /tmp/mlio && \ - mkdir -p build/release && \ - cd build/release && \ - cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" ../.. - -# Build MLIO core -RUN cd /tmp/mlio/build/release && \ - cmake --build . && \ - cmake --build . --target install - -# Configure MLIO Python extension -RUN cd /tmp/mlio/build/release && \ - cmake -DMLIO_INCLUDE_PYTHON_EXTENSION=ON -DPYTHON_EXECUTABLE="/usr/bin/python3" \ - -DMLIO_INCLUDE_ARROW_INTEGRATION=ON -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" ../.. - -# Build MLIO Python extension -RUN cd /tmp/mlio/build/release && \ - cmake --build . --target mlio-py && \ - cmake --build . --target mlio-arrow - -# Build MLIO Python wheel -RUN cd /tmp/mlio/src/mlio-py && \ - uv build - -# Copy TBB libraries and MLIO shared libraries to a location we can copy from -RUN mkdir -p /mlio-artifacts && \ - cp -r /tmp/mlio/build/third-party/lib/libtbb* /mlio-artifacts/ && \ - cp /usr/local/lib/libmlio* /mlio-artifacts/ 2>/dev/null || true && \ - cp /tmp/mlio/src/mlio-py/dist/*.whl /mlio-artifacts/ - -# Main image -FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST} - -ARG PYTHON_VERSION=3.10 -ARG PYARROW_VERSION=17.0.0 - -ENV DEBIAN_FRONTEND=noninteractive - -# Install runtime dependencies only -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install --no-install-recommends \ - curl git jq libatlas-base-dev nginx openjdk-8-jdk-headless unzip wget expat tzdata apparmor \ - libgstreamer1.0-0 libxml2 libsqlite3-0 software-properties-common ca-certificates lsb-release \ - build-essential linux-libc-dev && \ - # Add Apache Arrow repository for runtime libraries only - wget https://packages.apache.org/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ - apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ - apt-get update && \ - apt-get install -y -V libarrow-dev=17.0.0-1 libarrow-dataset-dev=17.0.0-1 libparquet-dev=17.0.0-1 libarrow-acero-dev=17.0.0-1 && \ - # Add deadsnakes PPA for Python 3.10 - add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get -y install --no-install-recommends \ - python3.10 python3.10-distutils python3.10-dev && \ - update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \ - # Create python symlink for backward compatibility - ln -sf /usr/bin/python3 /usr/bin/python && \ - curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN ln -fs /usr/share/zoneinfo/UTC /etc/localtime && \ - dpkg-reconfigure --frontend noninteractive tzdata - -# Install uv for fast Python package management -RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ - mv /root/.local/bin/uv /usr/local/bin/uv - -ENV PATH=/usr/local/bin:${PATH} -ENV PIP_ROOT_USER_ACTION=ignore - -# Copy MLIO wheel, TBB libraries, and MLIO shared libraries from builder stage -COPY --from=mlio-builder /mlio-artifacts/*.whl /tmp/ -COPY --from=mlio-builder /mlio-artifacts/libtbb* /usr/local/lib/ -COPY --from=mlio-builder /mlio-artifacts/libmlio* /usr/local/lib/ - -# Install MLIO wheel -RUN uv pip install --system /tmp/*.whl && \ - rm /tmp/*.whl - -# Copy compiled SQLite from builder stage -COPY --from=sqlite-builder /usr/local/bin/sqlite3 /usr/local/bin/sqlite3 -COPY --from=sqlite-builder /usr/local/lib/libsqlite3.* /usr/local/lib/ -COPY --from=sqlite-builder /usr/local/include/sqlite3*.h /usr/local/include/ - -# Update library cache and ensure /usr/local/bin is in PATH -RUN ldconfig && \ - echo "/usr/local/lib" > /etc/ld.so.conf.d/sqlite3.conf && \ - ldconfig - -ENV PATH="/usr/local/bin:${PATH}" - -# This command will check the version and print it to the build logs -RUN sqlite3 --version - -# Install awscli -RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ - unzip awscliv2.zip && \ - ./aws/install && \ - rm -r aws awscliv2.zip - -# Python won't try to write .pyc or .pyo files on the import of source modules -# Force stdin, stdout and stderr to be totally unbuffered. Good for logging -ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 - -# Install core scientific packages with exact versions -RUN uv pip install --system --no-cache \ - numpy==2.1.0 \ - scikit-learn==1.4.2 \ - pyarrow==17.0.0 diff --git a/pyproject.toml b/pyproject.toml index af835272..4863ad5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,15 +2,15 @@ name = "sagemaker-sklearn-container" version = "2.0" description = "SageMaker Scikit-learn Container" -requires-python = "==3.10.*" -license = "Apache-2.0" +requires-python = "==3.11.*" +license = {text = "Apache-2.0"} authors = [{name = "Amazon Web Services"}] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Natural Language :: English", "Programming Language :: Python", - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ] dynamic = ["readme", "dependencies", "optional-dependencies"] @@ -31,4 +31,4 @@ where = ["src"] exclude = ["test*"] [tool.setuptools.package-dir] -"" = "src" +"" = "src" \ No newline at end of file diff --git a/setup.py b/setup.py index 5be88421..b9db9368 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ def read(fname): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], install_requires=read("requirements.txt"), @@ -47,5 +48,5 @@ def read(fname): 'console_scripts': 'serve=sagemaker_sklearn_container.serving:serving_entrypoint' }, - python_requires='>=3.10', + python_requires='>=3.11', ) diff --git a/tox.ini b/tox.ini index 1613ed13..92232520 100644 --- a/tox.ini +++ b/tox.ini @@ -11,13 +11,11 @@ deps = -r{toxinidir}/test-requirements.txt conda_deps= pyarrow=14.0.1 - mlio-py=0.9 conda_channels= conda-forge - mlio commands = pytest --cov=sagemaker_sklearn_container --cov-fail-under=60 test/unit [testenv:flake8] deps = flake8 -commands = flake8 setup.py src test +commands = flake8 setup.py src test \ No newline at end of file