From a99f12abff7e53cf252efa07284d43e1fd695922 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Dec 2025 17:23:58 +0000 Subject: [PATCH 01/10] Initial plan From 1d6912f066db2878b534e90a676bec4d9b04004c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Dec 2025 17:30:54 +0000 Subject: [PATCH 02/10] Add RecursiveFilterIterator to skip scanning ignored directories Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Dist_Archive_Command.php | 6 ++- src/Distignore_Filter_Iterator.php | 74 ++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 src/Distignore_Filter_Iterator.php diff --git a/src/Dist_Archive_Command.php b/src/Dist_Archive_Command.php index 01ca0bb..e60e490 100644 --- a/src/Dist_Archive_Command.php +++ b/src/Dist_Archive_Command.php @@ -495,8 +495,10 @@ private function get_file_list( $source_dir_path, $excluded = false ) { $included_files = []; $excluded_files = []; - $iterator = new RecursiveIteratorIterator( - new RecursiveDirectoryIterator( $source_dir_path, RecursiveDirectoryIterator::SKIP_DOTS ), + $directory_iterator = new RecursiveDirectoryIterator( $source_dir_path, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iterator = new Distignore_Filter_Iterator( $directory_iterator, $this->checker, $source_dir_path ); + $iterator = new RecursiveIteratorIterator( + $filter_iterator, RecursiveIteratorIterator::SELF_FIRST ); diff --git a/src/Distignore_Filter_Iterator.php b/src/Distignore_Filter_Iterator.php new file mode 100644 index 0000000..811bab0 --- /dev/null +++ b/src/Distignore_Filter_Iterator.php @@ -0,0 +1,74 @@ + $iterator Iterator to filter. + * @param GitIgnoreChecker $checker GitIgnore checker instance. + * @param string $source_dir_path Base directory path. + */ + public function __construct( RecursiveIterator $iterator, GitIgnoreChecker $checker, $source_dir_path ) { + parent::__construct( $iterator ); + $this->checker = $checker; + $this->source_dir_path = $source_dir_path; + } + + /** + * Check whether the current element of the iterator is acceptable. + * + * @return bool True if the current element is acceptable, false otherwise. + */ + #[\ReturnTypeWillChange] + public function accept() { + /** @var SplFileInfo $item */ + $item = $this->current(); + + // If it's not a directory, accept it (filtering will happen later in get_file_list). + if ( ! $item->isDir() ) { + return true; + } + + // For directories, check if they should be ignored to prevent descending into them. + $relative_filepath = str_replace( $this->source_dir_path, '', $item->getPathname() ); + + try { + // If the directory is ignored, reject it to prevent descending. + return ! $this->checker->isPathIgnored( $relative_filepath ); + } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) { + // If there's an error checking, allow it through (error will be handled in get_file_list). + return true; + } + } + + /** + * Return the inner iterator's children wrapped in this filter. + * + * @return RecursiveFilterIterator + */ + #[\ReturnTypeWillChange] + public function getChildren() { + /** @var RecursiveDirectoryIterator $inner */ + $inner = $this->getInnerIterator(); + return new self( $inner->getChildren(), $this->checker, $this->source_dir_path ); + } +} From 61e77e1ded235adb6d45e674de6b2ba0dfed0456 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Dec 2025 17:37:20 +0000 Subject: [PATCH 03/10] Fix filter to yield ignored directories while preventing descent Updated the RecursiveFilterIterator to use hasChildren() instead of accept() to prevent descending into ignored directories. This ensures: 1. Ignored directories are still yielded (needed for exclude lists in targz format) 2. We don't descend into them (performance optimization) 3. All existing tests pass Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- features/distignore.feature | 47 ++++++++++++++++++++++++++++++ src/Distignore_Filter_Iterator.php | 27 +++++++++++++---- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/features/distignore.feature b/features/distignore.feature index e7929ca..d5aa3ad 100644 --- a/features/distignore.feature +++ b/features/distignore.feature @@ -379,3 +379,50 @@ Feature: Generate a distribution archive of a project with .distignore """ Error: Broken symlink at /symlink. Target missing at """ + + Scenario: Efficiently ignores directories with many files + # Performance test: ensure ignored directories are not scanned + # @see https://github.com/wp-cli/dist-archive-command/issues/XXX + Given an empty directory + And a foo/.distignore file: + """ + node_modules + .git + """ + And a foo/plugin.php file: + """ + current(); - // If it's not a directory, accept it (filtering will happen later in get_file_list). + // If it's not a directory, it has no children. if ( ! $item->isDir() ) { - return true; + return false; } // For directories, check if they should be ignored to prevent descending into them. $relative_filepath = str_replace( $this->source_dir_path, '', $item->getPathname() ); try { - // If the directory is ignored, reject it to prevent descending. + // If the directory is ignored, don't descend into it (but it's still yielded by accept()). return ! $this->checker->isPathIgnored( $relative_filepath ); } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) { - // If there's an error checking, allow it through (error will be handled in get_file_list). + // If there's an error checking, allow descending (error will be handled in get_file_list). return true; } } From f91472303c6f6cea9fd8632bd2242de4d9c29360 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Dec 2025 17:41:11 +0000 Subject: [PATCH 04/10] Improve filter to handle negation patterns correctly Updated hasChildren() to be more conservative when skipping descent: - Only skip for top-level ignored directories where children would also be ignored - This handles complex patterns like "frontend/*" with "!/frontend/build/" - Still provides performance benefit for common cases like node_modules and .git - All 27 distignore test scenarios now pass Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Distignore_Filter_Iterator.php | 41 +++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/Distignore_Filter_Iterator.php b/src/Distignore_Filter_Iterator.php index e17025b..8d429de 100644 --- a/src/Distignore_Filter_Iterator.php +++ b/src/Distignore_Filter_Iterator.php @@ -49,7 +49,12 @@ public function accept() { /** * Check whether the current element has children that should be recursed into. - * We return false for ignored directories to prevent descending into them. + * We return false for certain ignored directories to prevent descending into them. + * + * This optimization only applies to directories that appear to be "leaf" ignore + * patterns (simple directory names without wildcards), to safely handle cases + * like `node_modules` while still correctly processing complex patterns with + * negations like `frontend/*` with `!/frontend/build/`. * * @return bool True if we should descend into this directory, false otherwise. */ @@ -63,12 +68,40 @@ public function hasChildren() { return false; } - // For directories, check if they should be ignored to prevent descending into them. + // For directories, check if they should be ignored. $relative_filepath = str_replace( $this->source_dir_path, '', $item->getPathname() ); try { - // If the directory is ignored, don't descend into it (but it's still yielded by accept()). - return ! $this->checker->isPathIgnored( $relative_filepath ); + $is_ignored = $this->checker->isPathIgnored( $relative_filepath ); + + if ( ! $is_ignored ) { + // Not ignored, so descend. + return true; + } + + // Directory is ignored. Check if it's safe to skip descent. + // We only skip for single-level directories (no slashes except leading/trailing) + // to avoid issues with wildcard patterns and negations. + $path_parts = explode( '/', trim( $relative_filepath, '/' ) ); + if ( count( $path_parts ) === 1 ) { + // This is a top-level ignored directory like "/node_modules" or "/.git". + // It's likely safe to skip descent as these are typically simple patterns. + // However, we still need to be conservative. Let's check if a child would be ignored. + $test_child = $relative_filepath . '/test'; + try { + $child_ignored = $this->checker->isPathIgnored( $test_child ); + if ( $child_ignored ) { + // Child is also ignored, safe to skip descent. + return false; + } + } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) { + // On error, descend to be safe. + return true; + } + } + + // For nested directories or if test shows children might not be ignored, descend. + return true; } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) { // If there's an error checking, allow descending (error will be handled in get_file_list). return true; From e1f9c5ce4249092ab71b5ae916b6de5be81f2832 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Dec 2025 17:43:53 +0000 Subject: [PATCH 05/10] Address code review feedback - Use substr() instead of str_replace() for path manipulation to avoid edge cases - Add comment explaining the use of 'test' as a probe filename - Replace Bash-specific syntax with portable POSIX shell commands in tests - Fix code style (trailing whitespace) Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- features/distignore.feature | 6 +++--- src/Distignore_Filter_Iterator.php | 13 ++++++++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/features/distignore.feature b/features/distignore.feature index d5aa3ad..d9feaa3 100644 --- a/features/distignore.feature +++ b/features/distignore.feature @@ -405,13 +405,13 @@ Feature: Generate a distribution archive of a project with .distignore When I run `mkdir -p foo/node_modules/package1 foo/node_modules/package2 foo/node_modules/package3` Then STDERR should be empty - When I run `for i in {1..50}; do touch foo/node_modules/package1/file$i.js; done` + When I run `sh -c 'i=1; while [ $i -le 50 ]; do touch foo/node_modules/package1/file$i.js; i=$((i+1)); done'` Then STDERR should be empty - When I run `for i in {1..50}; do touch foo/node_modules/package2/file$i.js; done` + When I run `sh -c 'i=1; while [ $i -le 50 ]; do touch foo/node_modules/package2/file$i.js; i=$((i+1)); done'` Then STDERR should be empty - When I run `for i in {1..50}; do touch foo/node_modules/package3/file$i.js; done` + When I run `sh -c 'i=1; while [ $i -le 50 ]; do touch foo/node_modules/package3/file$i.js; i=$((i+1)); done'` Then STDERR should be empty When I run `wp dist-archive foo` diff --git a/src/Distignore_Filter_Iterator.php b/src/Distignore_Filter_Iterator.php index 8d429de..d3d96b7 100644 --- a/src/Distignore_Filter_Iterator.php +++ b/src/Distignore_Filter_Iterator.php @@ -69,7 +69,16 @@ public function hasChildren() { } // For directories, check if they should be ignored. - $relative_filepath = str_replace( $this->source_dir_path, '', $item->getPathname() ); + $pathname = $item->getPathname(); + $source_path_length = strlen( $this->source_dir_path ); + + // Extract relative path by removing the source directory prefix. + if ( 0 === strpos( $pathname, $this->source_dir_path ) ) { + $relative_filepath = substr( $pathname, $source_path_length ); + } else { + // Fallback if path doesn't start with source path (shouldn't happen). + $relative_filepath = $pathname; + } try { $is_ignored = $this->checker->isPathIgnored( $relative_filepath ); @@ -87,6 +96,8 @@ public function hasChildren() { // This is a top-level ignored directory like "/node_modules" or "/.git". // It's likely safe to skip descent as these are typically simple patterns. // However, we still need to be conservative. Let's check if a child would be ignored. + // We use 'test' as a probe filename to check if children would be ignored. + // The actual name doesn't matter; we just need to verify the pattern applies to children. $test_child = $relative_filepath . '/test'; try { $child_ignored = $this->checker->isPathIgnored( $test_child ); From 240b67c08b16d80e5b97d2aa1bbee9c189ad9f30 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Dec 2025 19:46:58 +0000 Subject: [PATCH 06/10] Add caching to avoid duplicate isPathIgnored() calls Introduced an ignored_cache array in Distignore_Filter_Iterator to cache the results of isPathIgnored() checks. This eliminates duplicate checks: - Once in hasChildren() when deciding whether to descend into directories - Once in get_file_list() when categorizing files/directories The cache is shared between both methods via the new isPathIgnoredCached() method, reducing redundant gitignore pattern matching for directories. Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Dist_Archive_Command.php | 2 +- src/Distignore_Filter_Iterator.php | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/Dist_Archive_Command.php b/src/Dist_Archive_Command.php index e60e490..e902d56 100644 --- a/src/Dist_Archive_Command.php +++ b/src/Dist_Archive_Command.php @@ -508,7 +508,7 @@ private function get_file_list( $source_dir_path, $excluded = false ) { foreach ( $iterator as $item ) { $relative_filepath = str_replace( $source_dir_path, '', $item->getPathname() ); try { - if ( $this->checker->isPathIgnored( $relative_filepath ) ) { + if ( $filter_iterator->isPathIgnoredCached( $relative_filepath ) ) { $excluded_files[] = $relative_filepath; } else { $included_files[] = $relative_filepath; diff --git a/src/Distignore_Filter_Iterator.php b/src/Distignore_Filter_Iterator.php index d3d96b7..5f5b039 100644 --- a/src/Distignore_Filter_Iterator.php +++ b/src/Distignore_Filter_Iterator.php @@ -22,6 +22,13 @@ class Distignore_Filter_Iterator extends RecursiveFilterIterator { */ private $source_dir_path; + /** + * Cache for ignored status to avoid duplicate checks. + * + * @var array + */ + private $ignored_cache = []; + /** * Constructor. * @@ -47,6 +54,20 @@ public function accept() { return true; } + /** + * Check if a path is ignored, with caching to avoid duplicate checks. + * + * @param string $relative_filepath Relative file path to check. + * @return bool True if the path is ignored, false otherwise. + * @throws \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException + */ + public function isPathIgnoredCached( $relative_filepath ) { + if ( ! isset( $this->ignored_cache[ $relative_filepath ] ) ) { + $this->ignored_cache[ $relative_filepath ] = $this->checker->isPathIgnored( $relative_filepath ); + } + return $this->ignored_cache[ $relative_filepath ]; + } + /** * Check whether the current element has children that should be recursed into. * We return false for certain ignored directories to prevent descending into them. @@ -81,7 +102,7 @@ public function hasChildren() { } try { - $is_ignored = $this->checker->isPathIgnored( $relative_filepath ); + $is_ignored = $this->isPathIgnoredCached( $relative_filepath ); if ( ! $is_ignored ) { // Not ignored, so descend. @@ -100,7 +121,7 @@ public function hasChildren() { // The actual name doesn't matter; we just need to verify the pattern applies to children. $test_child = $relative_filepath . '/test'; try { - $child_ignored = $this->checker->isPathIgnored( $test_child ); + $child_ignored = $this->isPathIgnoredCached( $test_child ); if ( $child_ignored ) { // Child is also ignored, safe to skip descent. return false; From 245fa221ecc49255ec527918d8af1336387367c2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Dec 2025 20:22:53 +0000 Subject: [PATCH 07/10] Refactor filtering to happen in iterator's accept() method Moved the filtering logic from get_file_list() into the iterator's accept() method as suggested. This provides a more profound optimization: - accept() now filters out ignored FILES immediately (not yielded) - Ignored DIRECTORIES are still yielded (for exclude tracking) but won't be descended into per hasChildren() - get_file_list() loop now only processes items that pass the filter, significantly reducing iterations - Excluded files are tracked in the iterator and available via getExcludedFiles() Performance: Iterator now yields only ~5 items instead of 200+ for a directory with 200 ignored files, while maintaining correct behavior for complex gitignore patterns with negations. Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Dist_Archive_Command.php | 44 +++++++++------- src/Distignore_Filter_Iterator.php | 85 ++++++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 23 deletions(-) diff --git a/src/Dist_Archive_Command.php b/src/Dist_Archive_Command.php index e902d56..624ca7b 100644 --- a/src/Dist_Archive_Command.php +++ b/src/Dist_Archive_Command.php @@ -493,7 +493,6 @@ protected function is_path_contains_symlink( $source_dir_path ) { private function get_file_list( $source_dir_path, $excluded = false ) { $included_files = []; - $excluded_files = []; $directory_iterator = new RecursiveDirectoryIterator( $source_dir_path, RecursiveDirectoryIterator::SKIP_DOTS ); $filter_iterator = new Distignore_Filter_Iterator( $directory_iterator, $this->checker, $source_dir_path ); @@ -507,34 +506,43 @@ private function get_file_list( $source_dir_path, $excluded = false ) { */ foreach ( $iterator as $item ) { $relative_filepath = str_replace( $source_dir_path, '', $item->getPathname() ); - try { - if ( $filter_iterator->isPathIgnoredCached( $relative_filepath ) ) { - $excluded_files[] = $relative_filepath; - } else { - $included_files[] = $relative_filepath; - } - } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) { + + // Check if this item had an error during filtering. + $error = $filter_iterator->getErrorForItem( $relative_filepath ); + if ( $error ) { if ( $item->isLink() && ! file_exists( (string) readlink( $item->getPathname() ) ) ) { WP_CLI::error( "Broken symlink at {$relative_filepath}. Target missing at {$item->getLinkTarget()}." ); } else { - WP_CLI::error( $exception->getMessage() ); + WP_CLI::error( $error->getMessage() ); } } - } - // Check all excluded directories and remove them from the excluded list if they contain included files. - foreach ( $excluded_files as $excluded_file_index => $excluded_relative_path ) { - if ( ! is_dir( $source_dir_path . $excluded_relative_path ) ) { - continue; + // Check if this item is ignored (directories may still be yielded even if ignored). + if ( ! $filter_iterator->isPathIgnoredCached( $relative_filepath ) ) { + $included_files[] = $relative_filepath; } - foreach ( $included_files as $included_relative_path ) { - if ( 0 === strpos( $included_relative_path, $excluded_relative_path ) ) { - unset( $excluded_files[ $excluded_file_index ] ); + } + + if ( $excluded ) { + // Get excluded files from the filter iterator. + $excluded_files = $filter_iterator->getExcludedFiles(); + + // Check all excluded directories and remove them from the excluded list if they contain included files. + foreach ( $excluded_files as $excluded_file_index => $excluded_relative_path ) { + if ( ! is_dir( $source_dir_path . $excluded_relative_path ) ) { + continue; + } + foreach ( $included_files as $included_relative_path ) { + if ( 0 === strpos( $included_relative_path, $excluded_relative_path ) ) { + unset( $excluded_files[ $excluded_file_index ] ); + } } } + + return $excluded_files; } - return $excluded ? $excluded_files : $included_files; + return $included_files; } /** diff --git a/src/Distignore_Filter_Iterator.php b/src/Distignore_Filter_Iterator.php index 5f5b039..513bbdd 100644 --- a/src/Distignore_Filter_Iterator.php +++ b/src/Distignore_Filter_Iterator.php @@ -29,6 +29,20 @@ class Distignore_Filter_Iterator extends RecursiveFilterIterator { */ private $ignored_cache = []; + /** + * List of excluded file paths (relative). + * + * @var string[] + */ + private $excluded_files = []; + + /** + * List of items that had errors during checking. + * + * @var array + */ + private $error_items = []; + /** * Constructor. * @@ -44,14 +58,51 @@ public function __construct( RecursiveIterator $iterator, GitIgnoreChecker $chec /** * Check whether the current element of the iterator is acceptable. - * We accept all elements so they can be checked in get_file_list(). + * Filters out ignored files so they don't appear in the iteration. + * For directories, we're more conservative - we only filter them out + * if we're certain they and all their contents should be ignored. * - * @return bool Always true to accept all elements. + * @return bool True if the element should be included, false otherwise. */ #[\ReturnTypeWillChange] public function accept() { - // Accept all elements - filtering happens in get_file_list(). - return true; + /** @var SplFileInfo $item */ + $item = $this->current(); + + // Get relative path. + $pathname = $item->getPathname(); + $source_path_length = strlen( $this->source_dir_path ); + + if ( 0 === strpos( $pathname, $this->source_dir_path ) ) { + $relative_filepath = substr( $pathname, $source_path_length ); + } else { + $relative_filepath = $pathname; + } + + try { + $is_ignored = $this->isPathIgnoredCached( $relative_filepath ); + + if ( $is_ignored ) { + // Track this as excluded. + $this->excluded_files[] = $relative_filepath; + + // For files, we can safely filter them out. + if ( ! $item->isDir() ) { + return false; + } + + // For directories, only filter out if we're not going to descend + // (hasChildren will handle that check). + // We need to yield ignored directories so they can be tracked in exclude lists. + return true; + } + + return true; + } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) { + // Store the error and yield the item so get_file_list can handle it. + $this->error_items[ $relative_filepath ] = $exception; + return true; + } } /** @@ -149,6 +200,30 @@ public function hasChildren() { public function getChildren() { /** @var RecursiveDirectoryIterator $inner */ $inner = $this->getInnerIterator(); - return new self( $inner->getChildren(), $this->checker, $this->source_dir_path ); + // Pass the same arrays by reference so they accumulate across all levels. + $child = new self( $inner->getChildren(), $this->checker, $this->source_dir_path ); + $child->excluded_files = &$this->excluded_files; + $child->ignored_cache = &$this->ignored_cache; + $child->error_items = &$this->error_items; + return $child; + } + + /** + * Get the list of excluded files that were filtered out. + * + * @return string[] + */ + public function getExcludedFiles() { + return $this->excluded_files; + } + + /** + * Check if an item had an error during processing. + * + * @param string $relative_filepath Relative file path to check. + * @return \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException|null + */ + public function getErrorForItem( $relative_filepath ) { + return $this->error_items[ $relative_filepath ] ?? null; } } From 9ce41bd5a9c201894293bb2103535df4fee0ea07 Mon Sep 17 00:00:00 2001 From: Pascal Birchler Date: Wed, 24 Dec 2025 10:22:47 +0100 Subject: [PATCH 08/10] Add phpunit config --- phpunit.xml.dist | 26 +++++++++++++++++++++++ tests/Distignore_Filter_Iterator_Test.php | 6 ++++++ 2 files changed, 32 insertions(+) create mode 100644 phpunit.xml.dist create mode 100644 tests/Distignore_Filter_Iterator_Test.php diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..cb905da --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,26 @@ + + + + tests + + + + + + src + + + diff --git a/tests/Distignore_Filter_Iterator_Test.php b/tests/Distignore_Filter_Iterator_Test.php new file mode 100644 index 0000000..3c3aad9 --- /dev/null +++ b/tests/Distignore_Filter_Iterator_Test.php @@ -0,0 +1,6 @@ + Date: Wed, 24 Dec 2025 09:29:18 +0000 Subject: [PATCH 09/10] Add comprehensive PHPUnit tests for Distignore_Filter_Iterator Created 9 test methods covering all major functionality: - test_filters_ignored_files: Verifies ignored files are not yielded - test_tracks_ignored_directories: Ensures directories are tracked but files inside are filtered - test_get_excluded_files: Tests getExcludedFiles() method - test_caching_avoids_duplicate_checks: Verifies caching behavior - test_has_children_prevents_descent: Tests that hasChildren() prevents descent into ignored dirs - test_negation_patterns: Validates complex negation patterns work correctly - test_get_error_for_item_returns_null: Tests error handling - test_nested_directory_filtering: Ensures multi-level directories are handled - test_children_share_state: Verifies child iterators share cache and excluded files All 9 tests pass with 24 assertions. Tests include setup/teardown with temporary directories. Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- .phpunit.result.cache | 1 + tests/Distignore_Filter_Iterator_Test.php | 258 ++++++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100644 .phpunit.result.cache diff --git a/.phpunit.result.cache b/.phpunit.result.cache new file mode 100644 index 0000000..eaa4af4 --- /dev/null +++ b/.phpunit.result.cache @@ -0,0 +1 @@ +{"version":2,"defects":[],"times":{"Distignore_Filter_Iterator_Test::test_filters_ignored_files":0.002,"Distignore_Filter_Iterator_Test::test_tracks_ignored_directories":0.001,"Distignore_Filter_Iterator_Test::test_get_excluded_files":0.001,"Distignore_Filter_Iterator_Test::test_caching_avoids_duplicate_checks":0,"Distignore_Filter_Iterator_Test::test_has_children_prevents_descent":0.001,"Distignore_Filter_Iterator_Test::test_negation_patterns":0.001,"Distignore_Filter_Iterator_Test::test_get_error_for_item_returns_null":0,"Distignore_Filter_Iterator_Test::test_nested_directory_filtering":0.001,"Distignore_Filter_Iterator_Test::test_children_share_state":0.001}} \ No newline at end of file diff --git a/tests/Distignore_Filter_Iterator_Test.php b/tests/Distignore_Filter_Iterator_Test.php index 3c3aad9..f147a5c 100644 --- a/tests/Distignore_Filter_Iterator_Test.php +++ b/tests/Distignore_Filter_Iterator_Test.php @@ -1,6 +1,264 @@ temp_dir = sys_get_temp_dir() . '/distignore-test-' . uniqid(); + mkdir( $this->temp_dir ); + } + + /** + * Clean up test environment. + */ + public function tearDown(): void { + if ( is_dir( $this->temp_dir ) ) { + $this->recursiveDelete( $this->temp_dir ); + } + parent::tearDown(); + } + + /** + * Recursively delete a directory. + * + * @param string $dir Directory to delete. + */ + private function recursiveDelete( $dir ) { + if ( ! is_dir( $dir ) ) { + return; + } + $files = array_diff( scandir( $dir ), array( '.', '..' ) ); + foreach ( $files as $file ) { + $path = $dir . '/' . $file; + is_dir( $path ) ? $this->recursiveDelete( $path ) : unlink( $path ); + } + rmdir( $dir ); + } + + /** + * Test that the iterator filters out ignored files. + */ + public function test_filters_ignored_files() { + // Create test structure. + file_put_contents( $this->temp_dir . '/included.txt', 'test' ); + file_put_contents( $this->temp_dir . '/ignored.log', 'test' ); + file_put_contents( $this->temp_dir . '/.distignore', "*.log\n" ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST ); + + $files = []; + foreach ( $recursive_iter as $item ) { + $files[] = basename( $item->getPathname() ); + } + + $this->assertContains( 'included.txt', $files ); + $this->assertContains( '.distignore', $files ); + $this->assertNotContains( 'ignored.log', $files, 'Ignored file should not be yielded' ); + } + + /** + * Test that ignored directories are tracked but files inside are not yielded. + */ + public function test_tracks_ignored_directories() { + // Create test structure. + mkdir( $this->temp_dir . '/node_modules' ); + file_put_contents( $this->temp_dir . '/node_modules/package.json', '{}' ); + file_put_contents( $this->temp_dir . '/index.php', 'temp_dir . '/.distignore', "node_modules\n" ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST ); + + $files = []; + foreach ( $recursive_iter as $item ) { + $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() ); + $files[] = $relative_path; + } + + $this->assertContains( '/index.php', $files ); + $this->assertContains( '/.distignore', $files ); + $this->assertContains( '/node_modules', $files, 'Ignored directory should be yielded for tracking' ); + $this->assertNotContains( '/node_modules/package.json', $files, 'Files inside ignored directory should not be yielded' ); + } + + /** + * Test that getExcludedFiles returns the correct list. + */ + public function test_get_excluded_files() { + mkdir( $this->temp_dir . '/ignored_dir' ); + file_put_contents( $this->temp_dir . '/ignored_dir/file.txt', 'test' ); + file_put_contents( $this->temp_dir . '/included.txt', 'test' ); + file_put_contents( $this->temp_dir . '/.distignore', "ignored_dir\n" ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST ); + + // Iterate to populate excluded files. + iterator_to_array( $recursive_iter ); + + $excluded = $filter_iter->getExcludedFiles(); + + $this->assertContains( '/ignored_dir', $excluded ); + $this->assertNotContains( '/included.txt', $excluded ); + } + + /** + * Test caching behavior to avoid duplicate checks. + */ + public function test_caching_avoids_duplicate_checks() { + file_put_contents( $this->temp_dir . '/test.txt', 'test' ); + file_put_contents( $this->temp_dir . '/.distignore', "*.log\n" ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + + // First call should cache the result. + $result1 = $filter_iter->isPathIgnoredCached( '/test.txt' ); + // Second call should use cache. + $result2 = $filter_iter->isPathIgnoredCached( '/test.txt' ); + + $this->assertSame( $result1, $result2 ); + $this->assertFalse( $result1 ); // test.txt should not be ignored. + } + + /** + * Test that hasChildren prevents descent into ignored directories. + */ + public function test_has_children_prevents_descent() { + mkdir( $this->temp_dir . '/node_modules' ); + file_put_contents( $this->temp_dir . '/node_modules/file1.js', 'test' ); + file_put_contents( $this->temp_dir . '/node_modules/file2.js', 'test' ); + file_put_contents( $this->temp_dir . '/.distignore', "node_modules\n" ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST ); + + $files = []; + foreach ( $recursive_iter as $item ) { + $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() ); + $files[] = $relative_path; + } + + // The node_modules directory should be yielded but its files should not. + $this->assertContains( '/node_modules', $files ); + $this->assertNotContains( '/node_modules/file1.js', $files ); + $this->assertNotContains( '/node_modules/file2.js', $files ); + } + + /** + * Test handling of negation patterns. + */ + public function test_negation_patterns() { + mkdir( $this->temp_dir . '/frontend' ); + mkdir( $this->temp_dir . '/frontend/build' ); + file_put_contents( $this->temp_dir . '/frontend/source.ts', 'test' ); + file_put_contents( $this->temp_dir . '/frontend/build/output.js', 'test' ); + file_put_contents( $this->temp_dir . '/.distignore', "frontend/*\n!/frontend/build/\n" ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST ); + + $files = []; + foreach ( $recursive_iter as $item ) { + $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() ); + $files[] = $relative_path; + } + + $this->assertContains( '/frontend', $files ); + $this->assertContains( '/frontend/build', $files ); + $this->assertContains( '/frontend/build/output.js', $files, 'Negated path should be included' ); + $this->assertNotContains( '/frontend/source.ts', $files, 'Ignored file should not be included' ); + } + + /** + * Test getErrorForItem returns null when no error. + */ + public function test_get_error_for_item_returns_null() { + file_put_contents( $this->temp_dir . '/test.txt', 'test' ); + file_put_contents( $this->temp_dir . '/.distignore', '' ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + + $error = $filter_iter->getErrorForItem( '/test.txt' ); + + $this->assertNull( $error ); + } + + /** + * Test that multiple levels of directories are handled correctly. + */ + public function test_nested_directory_filtering() { + mkdir( $this->temp_dir . '/src' ); + mkdir( $this->temp_dir . '/src/components' ); + file_put_contents( $this->temp_dir . '/src/index.php', 'temp_dir . '/src/components/widget.php', 'temp_dir . '/.distignore', '' ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST ); + + $files = []; + foreach ( $recursive_iter as $item ) { + $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() ); + $files[] = $relative_path; + } + + $this->assertContains( '/src', $files ); + $this->assertContains( '/src/components', $files ); + $this->assertContains( '/src/index.php', $files ); + $this->assertContains( '/src/components/widget.php', $files ); + } + + /** + * Test that children share the same cache and excluded files arrays. + */ + public function test_children_share_state() { + mkdir( $this->temp_dir . '/level1' ); + mkdir( $this->temp_dir . '/level1/level2' ); + file_put_contents( $this->temp_dir . '/level1/file1.txt', 'test' ); + file_put_contents( $this->temp_dir . '/level1/level2/file2.log', 'test' ); + file_put_contents( $this->temp_dir . '/.distignore', "*.log\n" ); + + $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' ); + $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS ); + $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir ); + $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST ); + + // Iterate to populate excluded files. + iterator_to_array( $recursive_iter ); + + $excluded = $filter_iter->getExcludedFiles(); + + // The .log file in level2 should be tracked even though it was found by a child iterator. + $this->assertContains( '/level1/level2/file2.log', $excluded ); + } } From 2f50390956d03e594fc538c034ae82274f7e06b1 Mon Sep 17 00:00:00 2001 From: Pascal Birchler Date: Wed, 24 Dec 2025 10:47:30 +0100 Subject: [PATCH 10/10] Update gitignore config --- .gitignore | 2 ++ .phpunit.result.cache | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) delete mode 100644 .phpunit.result.cache diff --git a/.gitignore b/.gitignore index 0346fa9..8d6c851 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ vendor/ phpunit.xml phpcs.xml .phpcs.xml +.phpunit.result.cache +.phpunit.cache diff --git a/.phpunit.result.cache b/.phpunit.result.cache deleted file mode 100644 index eaa4af4..0000000 --- a/.phpunit.result.cache +++ /dev/null @@ -1 +0,0 @@ -{"version":2,"defects":[],"times":{"Distignore_Filter_Iterator_Test::test_filters_ignored_files":0.002,"Distignore_Filter_Iterator_Test::test_tracks_ignored_directories":0.001,"Distignore_Filter_Iterator_Test::test_get_excluded_files":0.001,"Distignore_Filter_Iterator_Test::test_caching_avoids_duplicate_checks":0,"Distignore_Filter_Iterator_Test::test_has_children_prevents_descent":0.001,"Distignore_Filter_Iterator_Test::test_negation_patterns":0.001,"Distignore_Filter_Iterator_Test::test_get_error_for_item_returns_null":0,"Distignore_Filter_Iterator_Test::test_nested_directory_filtering":0.001,"Distignore_Filter_Iterator_Test::test_children_share_state":0.001}} \ No newline at end of file