diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index a09ac1a4e5ffb..9439d83fc0079 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -1,22 +1,27 @@ name: Set up Conda environment + inputs: environment-file: description: Conda environment file to use. default: environment.yml + runs: using: composite steps: - name: Install ${{ inputs.environment-file }} uses: mamba-org/setup-micromamba@v2 with: + micromamba-url: https://github.com/mamba-org/micromamba-releases/releases/latest/download/micromamba-linux-64 environment-file: ${{ inputs.environment-file }} environment-name: test condarc-file: ci/.condarc cache-environment: true cache-downloads: true + # avoid aggressive post-cleanup that can trigger JSON issues + post-cleanup: none - name: Uninstall pyarrow if: ${{ env.REMOVE_PYARROW == '1' }} - run: | - micromamba remove -y pyarrow shell: bash -el {0} + run: | + micromamba remove -y -n test pyarrow diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000000..e535ff19b1896 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +- name: Download micromamba (with retries) + shell: bash + run: | + set -euo pipefail + RETRIES=5 + URL="https://micromamba.snakepit.net/api/micromamba/linux-64/latest" # linux binary endpoint + OUT="$RUNNER_TEMP/micromamba.tar.bz2" + i=0 + until [ $i -ge $RETRIES ] + do + echo "Attempt $((i+1))..." + if curl -fSL "$URL" -o "$OUT"; then + echo "Downloaded micromamba" + break + fi + i=$((i+1)) + sleep $((i * 2)) + done + if [ ! -f "$OUT" ]; then + echo "Failed to download micromamba after $RETRIES attempts" + exit 1 + fi + mkdir -p "$RUNNER_TEMP/micromamba" + tar -xjf "$OUT" -C "$RUNNER_TEMP/micromamba" --strip-components=1 + export MAMBA_ROOT_PREFIX="$RUNNER_TEMP/micromamba-root" + mkdir -p "$MAMBA_ROOT_PREFIX" + echo "Adding micromamba to PATH" + echo "$RUNNER_TEMP/micromamba/bin" >> $GITHUB_PATH + +- name: Create env with micromamba + run: | + micromamba create -y -n ci-env python=3.10 + micromamba activate ci-env + micromamba install -y -n ci-env -c conda-forge diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index 114f7b942cf05..7e01f00c7383a 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -42,7 +42,8 @@ jobs: - name: Set up Conda uses: ./.github/actions/setup-conda - + with: + cache-buster: ${{ github.sha }} - name: Build Pandas uses: ./.github/actions/build_pandas diff --git a/1.26.0 b/1.26.0 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/2.8.2 b/2.8.2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/2023.3 b/2023.3 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/conda b/conda new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 070ad0bbe22ed..a3bd9160bc49e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -167,8 +167,38 @@ dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFram dtypes if "pyarrow" is set. The dtype_backends are still experimental. + .. versionadded:: 2.0 - .. versionadded:: 2.0 + +Google Colab +^^^^^^^^^^^^ + +Google Colab provides several methods to load data for :func:`read_csv` and similar functions. + +File upload ++++++++++++ + +.. ipython:: python + + from google.colab import files + # uploaded = files.upload() # Interactive in Colab + import io + import pandas as pd + # df = pd.read_csv(io.BytesIO(uploaded['example.csv'])) + +Google Drive +++++++++++++ + +.. ipython:: python + + from google.colab import drive + drive.mount('/content/drive') + df = pd.read_csv('/content/drive/MyDrive/example.csv') + +See `Google Colab IO notebook `_. + + + engine : {``'c'``, ``'python'``, ``'pyarrow'``} Parser engine to use. The C and pyarrow engines are faster, while the python engine