diff --git a/.env b/.env index 6985926772d..d00eefdb8df 100644 --- a/.env +++ b/.env @@ -99,8 +99,8 @@ VCPKG="66c0373dc7fca549e5803087b9487edfe3aca0a1" # 2026.01.16 Release # ci/docker/python-*-windows-*.dockerfile or the vcpkg config. # This is a workaround for our CI problem that "archery docker build" doesn't # use pulled built images in dev/tasks/python-wheels/github.windows.yml. -PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-07 -PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-07 +PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-13 +PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-13 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan". # See https://github.com/conan-io/conan-docker-tools#readme and diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 59171ddcaae..d8ee21761a5 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit openssl - pip install "cython>=3.1" setuptools pytest requests setuptools-scm + pip install build "cython>=3.1" pytest requests scikit-build-core setuptools-scm - name: Run Release Test shell: bash run: | diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 33ac193f86e..dd16d66b725 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -29,5 +29,5 @@ numpy>=1.16.6 pytest pytest-faulthandler s3fs>=2023.10.0 -setuptools>=77 +scikit-build-core setuptools_scm>=8 diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index e10766ef37e..1e20d54bf16 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -136,6 +136,10 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel +%PYTHON_CMD% -c "import scikit_build_core; print(scikit_build_core.__version__)" +%PYTHON_CMD% -c "import sys; print(sys.executable); print(sys.path)" +%PYTHON_CMD% -m pip list +%PYTHON_CMD% -m pip show scikit-build-core %PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1 @REM Repair the wheel with delvewheel diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index fe2c7b77591..2e6d8603058 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -60,7 +60,7 @@ def test_symbolic_links def test_python_version source Dir.chdir("#{@tag_name_no_rc}/python") do - sh("python3", "setup.py", "sdist") + sh("python", "-m", "build", "--sdist") if on_release_branch? pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz" else diff --git a/python/.gitignore b/python/.gitignore index ce97ba4af62..858c983d49c 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -44,7 +44,3 @@ manylinux1/arrow nm_arrow.log visible_symbols.log -# the purpose of the custom SDist class in setup.py is to include these files -# in the sdist tarball, but we don't want to track duplicates -LICENSE.txt -NOTICE.txt diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index f99225284a8..31ce2f149ea 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -352,7 +352,21 @@ set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) # Write out compile-time configuration constants -string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_PYBUILD_TYPE) +if(CMAKE_BUILD_TYPE) + string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_PYBUILD_TYPE) +else() + # For multi-config generators (XCode and Visual Studio), + # CMAKE_BUILD_TYPE is not set at configure time. + # scikit-build-core does the right thing with cmake.build-type and + # adds the corresponding --config but does not populate CMAKE_BUILD_TYPE + # for those. On this specific case, we set the default to "RELEASE" + # as it's the most common build type for users building from source. + # This is mainly relevant for our Windows wheels, which are built with + # Visual Studio and thus use a multi-config generator with Release. + # As a note this is only to populate config_internal.h.cmake. + set(UPPERCASE_PYBUILD_TYPE "RELEASE") +endif() + configure_file("${PYARROW_CPP_SOURCE_DIR}/config_internal.h.cmake" "${PYARROW_CPP_SOURCE_DIR}/config_internal.h" ESCAPE_QUOTES) diff --git a/python/LICENSE.txt b/python/LICENSE.txt new file mode 120000 index 00000000000..4ab43736a83 --- /dev/null +++ b/python/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/python/NOTICE.txt b/python/NOTICE.txt new file mode 120000 index 00000000000..eb9f24e040b --- /dev/null +++ b/python/NOTICE.txt @@ -0,0 +1 @@ +../NOTICE.txt \ No newline at end of file diff --git a/python/pyproject.toml b/python/pyproject.toml index 217dba81b87..0d5e04843ad 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,16 +17,14 @@ [build-system] requires = [ + "scikit-build-core", "cython >= 3.1", # Needed for build-time stub docstring extraction "libcst>=1.8.6", "numpy>=1.25", - # configuring setuptools_scm in pyproject.toml requires - # versions released after 2022 "setuptools_scm[toml]>=8", - "setuptools>=77", ] -build-backend = "setuptools.build_meta" +build-backend = "scikit_build_core.build" [project] name = "pyarrow" @@ -81,16 +79,18 @@ exclude = [ '\._.*$', ] -[tool.setuptools] -zip-safe=false -include-package-data=true +[tool.scikit-build] +cmake.build-type = "Release" +metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" +sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] +wheel.packages = ["pyarrow"] +wheel.install-dir = "pyarrow" -[tool.setuptools.packages.find] -include = ["pyarrow"] -namespaces = false - -[tool.setuptools.package-data] -pyarrow = ["*.pxd", "*.pyi", "*.pyx", "includes/*.pxd", "py.typed"] +[tool.scikit-build.cmake.define] +PYARROW_BUNDLE_ARROW_CPP = {env = "PYARROW_BUNDLE_ARROW_CPP", default = "OFF"} +PYARROW_BUNDLE_CYTHON_CPP = {env = "PYARROW_BUNDLE_CYTHON_CPP", default = "OFF"} +PYARROW_GENERATE_COVERAGE = {env = "PYARROW_GENERATE_COVERAGE", default = "OFF"} +PYARROW_CXXFLAGS = {env = "PYARROW_CXXFLAGS", default = ""} [tool.setuptools_scm] root = '..' diff --git a/python/requirements-build.txt b/python/requirements-build.txt index c3b7aa48eb6..2e8e8e774ec 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,5 +1,5 @@ cython>=3.1 libcst>=1.8.6 numpy>=1.25 +scikit-build-core setuptools_scm>=8 -setuptools>=77 diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt index 6a2c6221243..a3ac1396772 100644 --- a/python/requirements-wheel-build.txt +++ b/python/requirements-wheel-build.txt @@ -3,6 +3,6 @@ cython>=3.1 # Needed for build-time stub docstring extraction libcst>=1.8.6 numpy>=2.0.0 +scikit-build-core setuptools_scm -setuptools>=77 wheel diff --git a/python/setup.py b/python/setup.py deleted file mode 100755 index 4f2bf7585e1..00000000000 --- a/python/setup.py +++ /dev/null @@ -1,480 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import contextlib -import os -import os.path -from os.path import join as pjoin -import re -import shlex -import shutil -import sys -import warnings - -if sys.version_info >= (3, 10): - import sysconfig -else: - # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825) - from distutils import sysconfig - -from setuptools import setup, Extension, Distribution -from setuptools.command.sdist import sdist - -from Cython.Distutils import build_ext as _build_ext -import Cython - -# Check if we're running 64-bit Python -is_64_bit = sys.maxsize > 2**32 - -# We can't use sys.platform in a cross-compiling situation -# as here it may be set to the host not target platform -is_emscripten = ( - sysconfig.get_config_var("SOABI") - and sysconfig.get_config_var("SOABI").find("emscripten") != -1 -) - - -if Cython.__version__ < '3.1': - raise Exception( - 'Please update your Cython version. Supported Cython >= 3.1') - -setup_dir = os.path.abspath(os.path.dirname(__file__)) - -ext_suffix = sysconfig.get_config_var('EXT_SUFFIX') - - -@contextlib.contextmanager -def changed_dir(dirname): - oldcwd = os.getcwd() - os.chdir(dirname) - try: - yield - finally: - os.chdir(oldcwd) - - -def strtobool(val): - """Convert a string representation of truth to true (1) or false (0). - - True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values - are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if - 'val' is anything else. - """ - # Copied from distutils - val = val.lower() - if val in ('y', 'yes', 't', 'true', 'on', '1'): - return 1 - elif val in ('n', 'no', 'f', 'false', 'off', '0'): - return 0 - else: - raise ValueError("invalid truth value %r" % (val,)) - - -MSG_DEPR_SETUP_BUILD_FLAGS = """ - !! - - *********************************************************************** - The '{}' flag is being passed to setup.py, but this is - deprecated. - - If a certain component is available in Arrow C++, it will automatically - be enabled for the PyArrow build as well. If you want to force the - build of a certain component, you can still use the - PYARROW_WITH_$COMPONENT environment variable. - *********************************************************************** - - !! -""" - - -class build_ext(_build_ext): - _found_names = () - - def build_extensions(self): - import numpy - numpy_incl = numpy.get_include() - - self.extensions = [ext for ext in self.extensions - if ext.name != '__dummy__'] - - for ext in self.extensions: - if (hasattr(ext, 'include_dirs') and - numpy_incl not in ext.include_dirs): - ext.include_dirs.append(numpy_incl) - _build_ext.build_extensions(self) - - def run(self): - self._run_cmake() - self._update_stubs() - _build_ext.run(self) - - def _update_stubs(self): - """Copy stubs to build directory, then inject docstrings into the copies.""" - stubs_dir = pjoin(setup_dir, 'pyarrow-stubs') - if not os.path.exists(stubs_dir): - return - - build_cmd = self.get_finalized_command('build') - build_lib = os.path.abspath(build_cmd.build_lib) - - # Copy clean stubs to build directory first - self._copy_stubs(stubs_dir, build_lib) - - # Inject docstrings into the build copies (not the source stubs). - # We pass build_lib as stubs_dir since it mirrors the pyarrow-stubs/ - # directory structure (both contain a pyarrow/ subdirectory with .pyi - # files), so the namespace resolution logic works identically. - import importlib.util - spec = importlib.util.spec_from_file_location( - "update_stub_docstrings", - pjoin(setup_dir, 'scripts', 'update_stub_docstrings.py')) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - mod.add_docstrings_from_build(build_lib, build_lib) - - def _copy_stubs(self, stubs_dir, build_lib): - """Copy .pyi stub files to the build directory.""" - src_dir = pjoin(stubs_dir, 'pyarrow') - dest_dir = pjoin(build_lib, 'pyarrow') - - if not os.path.exists(src_dir): - return - - print(f"-- Copying stubs: {src_dir} -> {dest_dir}") - for root, dirs, files in os.walk(src_dir): - for fname in files: - if fname.endswith('.pyi'): - src = pjoin(root, fname) - rel_path = os.path.relpath(src, src_dir) - dest = pjoin(dest_dir, rel_path) - os.makedirs(os.path.dirname(dest), exist_ok=True) - shutil.copy2(src, dest) - - # adapted from cmake_build_ext in dynd-python - # github.com/libdynd/dynd-python - - description = "Build the C-extensions for arrow" - user_options = ([('cmake-generator=', None, 'CMake generator'), - ('extra-cmake-args=', None, 'extra arguments for CMake'), - ('build-type=', None, - 'build type (debug or release), default release'), - ('boost-namespace=', None, - 'namespace of boost (default: boost)'), - ('with-cuda', None, 'build the Cuda extension'), - ('with-flight', None, 'build the Flight extension'), - ('with-substrait', None, 'build the Substrait extension'), - ('with-acero', None, 'build the Acero Engine extension'), - ('with-dataset', None, 'build the Dataset extension'), - ('with-parquet', None, 'build the Parquet extension'), - ('with-parquet-encryption', None, - 'build the Parquet encryption extension'), - ('with-azure', None, - 'build the Azure Blob Storage extension'), - ('with-gcs', None, - 'build the Google Cloud Storage (GCS) extension'), - ('with-s3', None, 'build the Amazon S3 extension'), - ('with-static-parquet', None, 'link parquet statically'), - ('with-static-boost', None, 'link boost statically'), - ('with-orc', None, 'build the ORC extension'), - ('with-gandiva', None, 'build the Gandiva extension'), - ('generate-coverage', None, - 'enable Cython code coverage'), - ('bundle-boost', None, - 'bundle the (shared) Boost libraries'), - ('bundle-cython-cpp', None, - 'bundle generated Cython C++ code ' - '(used for code coverage)'), - ('bundle-arrow-cpp', None, - 'bundle the Arrow C++ libraries'), - ('bundle-arrow-cpp-headers', None, - 'bundle the Arrow C++ headers')] + - _build_ext.user_options) - - def initialize_options(self): - _build_ext.initialize_options(self) - self.cmake_generator = os.environ.get('PYARROW_CMAKE_GENERATOR') - if not self.cmake_generator and sys.platform == 'win32': - self.cmake_generator = 'Visual Studio 15 2017 Win64' - self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '') - self.build_type = os.environ.get('PYARROW_BUILD_TYPE', - 'release').lower() - - self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '') - - if sys.platform == 'win32': - # Cannot do debug builds in Windows unless Python itself is a debug - # build - if not hasattr(sys, 'gettotalrefcount'): - self.build_type = 'release' - - self.with_azure = None - self.with_gcs = None - self.with_s3 = None - self.with_hdfs = None - self.with_cuda = None - self.with_substrait = None - self.with_flight = None - self.with_acero = None - self.with_dataset = None - self.with_parquet = None - self.with_parquet_encryption = None - self.with_orc = None - self.with_gandiva = None - - self.generate_coverage = strtobool( - os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) - self.bundle_arrow_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0')) - self.bundle_cython_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) - - CYTHON_MODULE_NAMES = [ - 'lib', - '_fs', - '_csv', - '_json', - '_compute', - '_cuda', - '_flight', - '_dataset', - '_dataset_orc', - '_dataset_parquet', - '_acero', - '_feather', - '_parquet', - '_parquet_encryption', - '_pyarrow_cpp_tests', - '_orc', - '_azurefs', - '_gcsfs', - '_s3fs', - '_substrait', - '_hdfs', - 'gandiva'] - - def _run_cmake(self): - # check if build_type is correctly passed / set - if self.build_type.lower() not in ('release', 'debug', - 'relwithdebinfo'): - raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to " - "be 'release', 'debug' or 'relwithdebinfo'") - - # The directory containing this setup.py - source = os.path.dirname(os.path.abspath(__file__)) - - # The staging directory for the module being built - build_cmd = self.get_finalized_command('build') - saved_cwd = os.getcwd() - build_temp = pjoin(saved_cwd, build_cmd.build_temp) - build_lib = pjoin(saved_cwd, build_cmd.build_lib) - - if not os.path.isdir(build_temp): - self.mkpath(build_temp) - - if self.inplace: - # a bit hacky - build_lib = saved_cwd - - install_prefix = pjoin(build_lib, "pyarrow") - - # Change to the build directory - with changed_dir(build_temp): - # Detect if we built elsewhere - if os.path.isfile('CMakeCache.txt'): - cachefile = open('CMakeCache.txt', 'r') - cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)', - cachefile.read()).group(1) - cachefile.close() - if (cachedir != build_temp): - build_base = pjoin(saved_cwd, build_cmd.build_base) - print(f"-- Skipping build. Temp build {build_temp} does " - f"not match cached dir {cachedir}") - print("---- For a clean build you might want to delete " - f"{build_base}.") - return - - cmake_options = [ - f'-DCMAKE_INSTALL_PREFIX={install_prefix}', - f'-DPYTHON_EXECUTABLE={sys.executable}', - f'-DPython3_EXECUTABLE={sys.executable}', - f'-DPYARROW_CXXFLAGS={self.cmake_cxxflags}', - ] - - def append_cmake_bool(value, varname): - cmake_options.append(f'-D{varname}={"on" if value else "off"}') - - def append_cmake_component(flag, varname): - # only pass this to cmake if the user pass the --with-component - # flag to setup.py build_ext - if flag is not None: - flag_name = ( - "--with-" - + varname.removeprefix("PYARROW_").lower().replace("_", "-")) - warnings.warn( - MSG_DEPR_SETUP_BUILD_FLAGS.format(flag_name), - UserWarning, stacklevel=2 - ) - append_cmake_bool(flag, varname) - - if self.cmake_generator: - cmake_options += ['-G', self.cmake_generator] - - append_cmake_component(self.with_cuda, 'PYARROW_CUDA') - append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT') - append_cmake_component(self.with_flight, 'PYARROW_FLIGHT') - append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA') - append_cmake_component(self.with_acero, 'PYARROW_ACERO') - append_cmake_component(self.with_dataset, 'PYARROW_DATASET') - append_cmake_component(self.with_orc, 'PYARROW_ORC') - append_cmake_component(self.with_parquet, 'PYARROW_PARQUET') - append_cmake_component(self.with_parquet_encryption, - 'PYARROW_PARQUET_ENCRYPTION') - append_cmake_component(self.with_azure, 'PYARROW_AZURE') - append_cmake_component(self.with_gcs, 'PYARROW_GCS') - append_cmake_component(self.with_s3, 'PYARROW_S3') - append_cmake_component(self.with_hdfs, 'PYARROW_HDFS') - - append_cmake_bool(self.bundle_arrow_cpp, - 'PYARROW_BUNDLE_ARROW_CPP') - append_cmake_bool(self.bundle_cython_cpp, - 'PYARROW_BUNDLE_CYTHON_CPP') - append_cmake_bool(self.generate_coverage, - 'PYARROW_GENERATE_COVERAGE') - - cmake_options.append( - f'-DCMAKE_BUILD_TYPE={self.build_type.lower()}') - - extra_cmake_args = shlex.split(self.extra_cmake_args) - - build_tool_args = [] - if sys.platform == 'win32': - if not is_64_bit: - raise RuntimeError('Not supported on 32-bit Windows') - else: - build_tool_args.append('--') - if os.environ.get('PYARROW_BUILD_VERBOSE', '0') == '1': - cmake_options.append('-DCMAKE_VERBOSE_MAKEFILE=ON') - parallel = os.environ.get('PYARROW_PARALLEL') - if parallel: - build_tool_args.append(f'-j{parallel}') - - # Generate the build files - if is_emscripten: - print("-- Running emcmake cmake for PyArrow on Emscripten") - self.spawn(['emcmake', 'cmake'] + extra_cmake_args + - cmake_options + [source]) - else: - print("-- Running cmake for PyArrow") - self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source]) - - print("-- Finished cmake for PyArrow") - - print("-- Running cmake --build for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - build_tool_args) - print("-- Finished cmake --build for PyArrow") - - print("-- Running cmake --build --target install for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - ['--target', 'install'] + build_tool_args) - print("-- Finished cmake --build --target install for PyArrow") - - self._found_names = [] - for name in self.CYTHON_MODULE_NAMES: - built_path = pjoin(install_prefix, name + ext_suffix) - if os.path.exists(built_path): - self._found_names.append(name) - - def _get_build_dir(self): - # Get the package directory from build_py - build_py = self.get_finalized_command('build_py') - return build_py.get_package_dir('pyarrow') - - def _get_cmake_ext_path(self, name): - # This is the name of the arrow C-extension - filename = name + ext_suffix - return pjoin(self._get_build_dir(), filename) - - def get_ext_generated_cpp_source(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + ".cpp") - else: - return pjoin(name + ".cpp") - - def get_ext_built_api_header(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + "_api.h") - else: - return pjoin(name + "_api.h") - - def get_names(self): - return self._found_names - - def get_outputs(self): - # Just the C extensions - # regular_exts = _build_ext.get_outputs(self) - return [self._get_cmake_ext_path(name) - for name in self.get_names()] - - -class BinaryDistribution(Distribution): - def has_ext_modules(foo): - return True - - -class CopyLicenseSdist(sdist): - """Custom sdist command that copies license files from parent directory.""" - - def make_release_tree(self, base_dir, files): - # Call parent to do the normal work - super().make_release_tree(base_dir, files) - - # Define source (parent dir) and destination (sdist root) for license files - license_files = [ - ("LICENSE.txt", "../LICENSE.txt"), - ("NOTICE.txt", "../NOTICE.txt"), - ] - - for dest_name, src_path in license_files: - src_full = os.path.join(os.path.dirname(__file__), src_path) - dest_full = os.path.join(base_dir, dest_name) - - # Remove any existing file/symlink at destination - if os.path.exists(dest_full) or os.path.islink(dest_full): - os.unlink(dest_full) - - if not os.path.exists(src_full): - msg = f"Required license file not found: {src_full}" - raise FileNotFoundError(msg) - - shutil.copy2(src_full, dest_full) - print(f"Copied {src_path} to {dest_name} in sdist") - - -setup( - distclass=BinaryDistribution, - # Dummy extension to trigger build_ext - ext_modules=[Extension('__dummy__', sources=[])], - cmdclass={ - 'build_ext': build_ext, - 'sdist': CopyLicenseSdist, - }, -)