diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c2ed0b0..d8caef0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.3.8] - 2026-01-11 + +### Added +- tests: add missing pathlib abc tests (#511) +- tests: split base test cases into joinable/readable/writable tests (#507) +- docs: describe UPath/s3fs behavior with `is_dir()` (#503) + +### Fixed +- upath.implementations.cloud: fix S3Path copy to local with name collision of file/dir (#533) +- upath.core: fix behaviour of `UPath.parent` and `UPath.parents` (#529) +- upath.implementations.github: adjust GitHubPath error reporting (#522) +- upath.implementations.cloud: fix error handling on HfPath (#521) +- upath.implementations.zip: disable write mode in universal-pathlib (#520) +- upath.implementations.tar: fix error handling for write methods (#519) +- upath.implementations.http: fix HTTPPath error handling for unsupported methods (#518) +- upath.implementations.data: fix DataPath error handling for unsupported methods (#517) +- upath.core: fix `touch()` method (#515) +- upath.extensions: fix `is_relative_to()` for extensions (#510) +- upath.extensions: fix error behavior for `hardlink_to()` backport and `symlink_to()` (#508) +- upath: fix `iterdir()` behaviour when raising NotADirectoryError for all UPath subclasses (#506) +- tests: xfail on name resolution error in github suite (#523) +- tests: fix GitHub tests without a network connection (#509) + +### Changed +- upath: adjust behavior of `UPath.copy()` and `UPath.copy_into()` with str and Path targets (#530) +- upath.core: handover cached fs instances in `with_segments()` (#516) +- tests: split test suite according to abcs and cleanup tests (#513) +- tests: remove two unused helper functions introduced in #492 (#505) +- ci: do not hardcode Python executable name (#504) +- ci: updated development dependencies (#501) + ## [0.3.7] - 2025-12-03 ### Added @@ -313,7 +344,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - started a changelog to keep track of significant changes -[Unreleased]: https://github.com/fsspec/universal_pathlib/compare/v0.3.7...HEAD +[Unreleased]: https://github.com/fsspec/universal_pathlib/compare/v0.3.8...HEAD +[0.3.8]: https://github.com/fsspec/universal_pathlib/compare/v0.3.7...v0.3.8 [0.3.7]: https://github.com/fsspec/universal_pathlib/compare/v0.3.6...v0.3.7 [0.3.6]: https://github.com/fsspec/universal_pathlib/compare/v0.3.5...v0.3.6 [0.3.5]: https://github.com/fsspec/universal_pathlib/compare/v0.3.4...v0.3.5 diff --git a/upath/core.py b/upath/core.py index d4c08a46..e9e5b67a 100644 --- a/upath/core.py +++ b/upath/core.py @@ -1190,15 +1190,26 @@ def iterdir(self) -> Iterator[Self]: base_path = base.path if not fs.isdir(base_path): raise NotADirectoryError(str(self)) - for name in fs.listdir(base_path): + name: str + for info in fs.listdir(base_path): # fsspec returns dictionaries - if isinstance(name, dict): - name = name.get("name") + if isinstance(info, dict): + name = info["name"] + else: + name, info = info, {} # type: ignore[assignment] + # skip the base path itself if a zero-length name + name = name.removesuffix(sep) + if ( + name == base_path + and info.get("size", -1) == 0 + and info.get("Key", None) == base_path + sep + ): + continue # skip empty names + # only want the path name with iterdir + _, _, name = name.rpartition(self.parser.sep) if name in {".", ".."}: # Yielding a path object for these makes little sense continue - # only want the path name with iterdir - _, _, name = name.removesuffix(sep).rpartition(self.parser.sep) yield base.with_segments(base_path, name) def __open_reader__(self) -> BinaryIO: diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index a7fcc5ad..d831b3af 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -213,3 +213,59 @@ def on_collision_rename_file(src, dst): "src/common_prefix/file1.txt", "src/common_prefix/file2.txt", ] + + +@pytest.fixture +def s3_with_explicit_directory_marker(s3_server): + """issue #226: path.iterdir() yields path itself as the first item (with S3) + + Creates a bucket with an explicit directory marker (zero-byte object with + trailing slash) plus files. This simulates folders created via the AWS + Console, which creates actual zero-byte objects as directory markers. + + Given objects on S3: + s3://my-bucket/my-directory/ (zero-byte directory marker) + s3://my-bucket/my-directory/0.txt + s3://my-bucket/my-directory/1.txt + """ + import boto3 + + anon, s3so = s3_server + bucket = "iterdir_issue_226_marker_bucket" + s3_client = boto3.client("s3", **s3so["client_kwargs"]) + s3_client.create_bucket(Bucket=bucket) + # Create an explicit directory marker (zero-byte object with trailing slash) + # Use boto3 directly to ensure the trailing slash is preserved in the key + # This is what the AWS Console creates when you "Create folder" + s3_client.put_object(Bucket=bucket, Key="my-directory/", Body=b"") + # Then create files inside the directory + s3_client.put_object(Bucket=bucket, Key="my-directory/0.txt", Body=b"content 0") + s3_client.put_object(Bucket=bucket, Key="my-directory/1.txt", Body=b"content 1") + yield bucket, anon, s3so + + +@silence_botocore_datetime_deprecation +def test_iterdir_with_explicit_directory_marker__issue_226( + s3_with_explicit_directory_marker, +): + """issue #226: path.iterdir() yields path itself as the first item (with S3) + + See: https://github.com/fsspec/universal_pathlib/issues/226 + See: https://medium.com/cyberark-engineering/the-strange-case-of-amazon-s3-bucket-folders-c8d113a8dd01 # noqa: E501 + """ + bucket, anon, s3so = s3_with_explicit_directory_marker + directory_path = UPath(f"s3://{bucket}/my-directory", anon=anon, **s3so) + + children = list(directory_path.iterdir()) + + assert directory_path not in children + + expected_files = { + directory_path / "0.txt", + directory_path / "1.txt", + } + assert set(children) == expected_files, ( + f"iterdir() should yield only the files in the directory. " + f"Expected: {[str(f) for f in expected_files]}, " + f"Got: {[str(c) for c in children]}" + )