diff --git a/.gitignore b/.gitignore
index 0346fa9..8d6c851 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@ vendor/
phpunit.xml
phpcs.xml
.phpcs.xml
+.phpunit.result.cache
+.phpunit.cache
diff --git a/features/distignore.feature b/features/distignore.feature
index e7929ca..d9feaa3 100644
--- a/features/distignore.feature
+++ b/features/distignore.feature
@@ -379,3 +379,50 @@ Feature: Generate a distribution archive of a project with .distignore
"""
Error: Broken symlink at /symlink. Target missing at
"""
+
+ Scenario: Efficiently ignores directories with many files
+ # Performance test: ensure ignored directories are not scanned
+ # @see https://github.com/wp-cli/dist-archive-command/issues/XXX
+ Given an empty directory
+ And a foo/.distignore file:
+ """
+ node_modules
+ .git
+ """
+ And a foo/plugin.php file:
+ """
+
+
+
+ tests
+
+
+
+
+
+ src
+
+
+
diff --git a/src/Dist_Archive_Command.php b/src/Dist_Archive_Command.php
index 01ca0bb..624ca7b 100644
--- a/src/Dist_Archive_Command.php
+++ b/src/Dist_Archive_Command.php
@@ -493,10 +493,11 @@ protected function is_path_contains_symlink( $source_dir_path ) {
private function get_file_list( $source_dir_path, $excluded = false ) {
$included_files = [];
- $excluded_files = [];
- $iterator = new RecursiveIteratorIterator(
- new RecursiveDirectoryIterator( $source_dir_path, RecursiveDirectoryIterator::SKIP_DOTS ),
+ $directory_iterator = new RecursiveDirectoryIterator( $source_dir_path, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iterator = new Distignore_Filter_Iterator( $directory_iterator, $this->checker, $source_dir_path );
+ $iterator = new RecursiveIteratorIterator(
+ $filter_iterator,
RecursiveIteratorIterator::SELF_FIRST
);
@@ -505,34 +506,43 @@ private function get_file_list( $source_dir_path, $excluded = false ) {
*/
foreach ( $iterator as $item ) {
$relative_filepath = str_replace( $source_dir_path, '', $item->getPathname() );
- try {
- if ( $this->checker->isPathIgnored( $relative_filepath ) ) {
- $excluded_files[] = $relative_filepath;
- } else {
- $included_files[] = $relative_filepath;
- }
- } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) {
+
+ // Check if this item had an error during filtering.
+ $error = $filter_iterator->getErrorForItem( $relative_filepath );
+ if ( $error ) {
if ( $item->isLink() && ! file_exists( (string) readlink( $item->getPathname() ) ) ) {
WP_CLI::error( "Broken symlink at {$relative_filepath}. Target missing at {$item->getLinkTarget()}." );
} else {
- WP_CLI::error( $exception->getMessage() );
+ WP_CLI::error( $error->getMessage() );
}
}
- }
- // Check all excluded directories and remove them from the excluded list if they contain included files.
- foreach ( $excluded_files as $excluded_file_index => $excluded_relative_path ) {
- if ( ! is_dir( $source_dir_path . $excluded_relative_path ) ) {
- continue;
+ // Check if this item is ignored (directories may still be yielded even if ignored).
+ if ( ! $filter_iterator->isPathIgnoredCached( $relative_filepath ) ) {
+ $included_files[] = $relative_filepath;
}
- foreach ( $included_files as $included_relative_path ) {
- if ( 0 === strpos( $included_relative_path, $excluded_relative_path ) ) {
- unset( $excluded_files[ $excluded_file_index ] );
+ }
+
+ if ( $excluded ) {
+ // Get excluded files from the filter iterator.
+ $excluded_files = $filter_iterator->getExcludedFiles();
+
+ // Check all excluded directories and remove them from the excluded list if they contain included files.
+ foreach ( $excluded_files as $excluded_file_index => $excluded_relative_path ) {
+ if ( ! is_dir( $source_dir_path . $excluded_relative_path ) ) {
+ continue;
+ }
+ foreach ( $included_files as $included_relative_path ) {
+ if ( 0 === strpos( $included_relative_path, $excluded_relative_path ) ) {
+ unset( $excluded_files[ $excluded_file_index ] );
+ }
}
}
+
+ return $excluded_files;
}
- return $excluded ? $excluded_files : $included_files;
+ return $included_files;
}
/**
diff --git a/src/Distignore_Filter_Iterator.php b/src/Distignore_Filter_Iterator.php
new file mode 100644
index 0000000..513bbdd
--- /dev/null
+++ b/src/Distignore_Filter_Iterator.php
@@ -0,0 +1,229 @@
+
+ */
+ private $ignored_cache = [];
+
+ /**
+ * List of excluded file paths (relative).
+ *
+ * @var string[]
+ */
+ private $excluded_files = [];
+
+ /**
+ * List of items that had errors during checking.
+ *
+ * @var array
+ */
+ private $error_items = [];
+
+ /**
+ * Constructor.
+ *
+ * @param RecursiveIterator $iterator Iterator to filter.
+ * @param GitIgnoreChecker $checker GitIgnore checker instance.
+ * @param string $source_dir_path Base directory path.
+ */
+ public function __construct( RecursiveIterator $iterator, GitIgnoreChecker $checker, $source_dir_path ) {
+ parent::__construct( $iterator );
+ $this->checker = $checker;
+ $this->source_dir_path = $source_dir_path;
+ }
+
+ /**
+ * Check whether the current element of the iterator is acceptable.
+ * Filters out ignored files so they don't appear in the iteration.
+ * For directories, we're more conservative - we only filter them out
+ * if we're certain they and all their contents should be ignored.
+ *
+ * @return bool True if the element should be included, false otherwise.
+ */
+ #[\ReturnTypeWillChange]
+ public function accept() {
+ /** @var SplFileInfo $item */
+ $item = $this->current();
+
+ // Get relative path.
+ $pathname = $item->getPathname();
+ $source_path_length = strlen( $this->source_dir_path );
+
+ if ( 0 === strpos( $pathname, $this->source_dir_path ) ) {
+ $relative_filepath = substr( $pathname, $source_path_length );
+ } else {
+ $relative_filepath = $pathname;
+ }
+
+ try {
+ $is_ignored = $this->isPathIgnoredCached( $relative_filepath );
+
+ if ( $is_ignored ) {
+ // Track this as excluded.
+ $this->excluded_files[] = $relative_filepath;
+
+ // For files, we can safely filter them out.
+ if ( ! $item->isDir() ) {
+ return false;
+ }
+
+ // For directories, only filter out if we're not going to descend
+ // (hasChildren will handle that check).
+ // We need to yield ignored directories so they can be tracked in exclude lists.
+ return true;
+ }
+
+ return true;
+ } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) {
+ // Store the error and yield the item so get_file_list can handle it.
+ $this->error_items[ $relative_filepath ] = $exception;
+ return true;
+ }
+ }
+
+ /**
+ * Check if a path is ignored, with caching to avoid duplicate checks.
+ *
+ * @param string $relative_filepath Relative file path to check.
+ * @return bool True if the path is ignored, false otherwise.
+ * @throws \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException
+ */
+ public function isPathIgnoredCached( $relative_filepath ) {
+ if ( ! isset( $this->ignored_cache[ $relative_filepath ] ) ) {
+ $this->ignored_cache[ $relative_filepath ] = $this->checker->isPathIgnored( $relative_filepath );
+ }
+ return $this->ignored_cache[ $relative_filepath ];
+ }
+
+ /**
+ * Check whether the current element has children that should be recursed into.
+ * We return false for certain ignored directories to prevent descending into them.
+ *
+ * This optimization only applies to directories that appear to be "leaf" ignore
+ * patterns (simple directory names without wildcards), to safely handle cases
+ * like `node_modules` while still correctly processing complex patterns with
+ * negations like `frontend/*` with `!/frontend/build/`.
+ *
+ * @return bool True if we should descend into this directory, false otherwise.
+ */
+ #[\ReturnTypeWillChange]
+ public function hasChildren() {
+ /** @var SplFileInfo $item */
+ $item = $this->current();
+
+ // If it's not a directory, it has no children.
+ if ( ! $item->isDir() ) {
+ return false;
+ }
+
+ // For directories, check if they should be ignored.
+ $pathname = $item->getPathname();
+ $source_path_length = strlen( $this->source_dir_path );
+
+ // Extract relative path by removing the source directory prefix.
+ if ( 0 === strpos( $pathname, $this->source_dir_path ) ) {
+ $relative_filepath = substr( $pathname, $source_path_length );
+ } else {
+ // Fallback if path doesn't start with source path (shouldn't happen).
+ $relative_filepath = $pathname;
+ }
+
+ try {
+ $is_ignored = $this->isPathIgnoredCached( $relative_filepath );
+
+ if ( ! $is_ignored ) {
+ // Not ignored, so descend.
+ return true;
+ }
+
+ // Directory is ignored. Check if it's safe to skip descent.
+ // We only skip for single-level directories (no slashes except leading/trailing)
+ // to avoid issues with wildcard patterns and negations.
+ $path_parts = explode( '/', trim( $relative_filepath, '/' ) );
+ if ( count( $path_parts ) === 1 ) {
+ // This is a top-level ignored directory like "/node_modules" or "/.git".
+ // It's likely safe to skip descent as these are typically simple patterns.
+ // However, we still need to be conservative. Let's check if a child would be ignored.
+ // We use 'test' as a probe filename to check if children would be ignored.
+ // The actual name doesn't matter; we just need to verify the pattern applies to children.
+ $test_child = $relative_filepath . '/test';
+ try {
+ $child_ignored = $this->isPathIgnoredCached( $test_child );
+ if ( $child_ignored ) {
+ // Child is also ignored, safe to skip descent.
+ return false;
+ }
+ } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) {
+ // On error, descend to be safe.
+ return true;
+ }
+ }
+
+ // For nested directories or if test shows children might not be ignored, descend.
+ return true;
+ } catch ( \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException $exception ) {
+ // If there's an error checking, allow descending (error will be handled in get_file_list).
+ return true;
+ }
+ }
+
+ /**
+ * Return the inner iterator's children wrapped in this filter.
+ *
+ * @return RecursiveFilterIterator
+ */
+ #[\ReturnTypeWillChange]
+ public function getChildren() {
+ /** @var RecursiveDirectoryIterator $inner */
+ $inner = $this->getInnerIterator();
+ // Pass the same arrays by reference so they accumulate across all levels.
+ $child = new self( $inner->getChildren(), $this->checker, $this->source_dir_path );
+ $child->excluded_files = &$this->excluded_files;
+ $child->ignored_cache = &$this->ignored_cache;
+ $child->error_items = &$this->error_items;
+ return $child;
+ }
+
+ /**
+ * Get the list of excluded files that were filtered out.
+ *
+ * @return string[]
+ */
+ public function getExcludedFiles() {
+ return $this->excluded_files;
+ }
+
+ /**
+ * Check if an item had an error during processing.
+ *
+ * @param string $relative_filepath Relative file path to check.
+ * @return \Inmarelibero\GitIgnoreChecker\Exception\InvalidArgumentException|null
+ */
+ public function getErrorForItem( $relative_filepath ) {
+ return $this->error_items[ $relative_filepath ] ?? null;
+ }
+}
diff --git a/tests/Distignore_Filter_Iterator_Test.php b/tests/Distignore_Filter_Iterator_Test.php
new file mode 100644
index 0000000..f147a5c
--- /dev/null
+++ b/tests/Distignore_Filter_Iterator_Test.php
@@ -0,0 +1,264 @@
+temp_dir = sys_get_temp_dir() . '/distignore-test-' . uniqid();
+ mkdir( $this->temp_dir );
+ }
+
+ /**
+ * Clean up test environment.
+ */
+ public function tearDown(): void {
+ if ( is_dir( $this->temp_dir ) ) {
+ $this->recursiveDelete( $this->temp_dir );
+ }
+ parent::tearDown();
+ }
+
+ /**
+ * Recursively delete a directory.
+ *
+ * @param string $dir Directory to delete.
+ */
+ private function recursiveDelete( $dir ) {
+ if ( ! is_dir( $dir ) ) {
+ return;
+ }
+ $files = array_diff( scandir( $dir ), array( '.', '..' ) );
+ foreach ( $files as $file ) {
+ $path = $dir . '/' . $file;
+ is_dir( $path ) ? $this->recursiveDelete( $path ) : unlink( $path );
+ }
+ rmdir( $dir );
+ }
+
+ /**
+ * Test that the iterator filters out ignored files.
+ */
+ public function test_filters_ignored_files() {
+ // Create test structure.
+ file_put_contents( $this->temp_dir . '/included.txt', 'test' );
+ file_put_contents( $this->temp_dir . '/ignored.log', 'test' );
+ file_put_contents( $this->temp_dir . '/.distignore', "*.log\n" );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+ $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST );
+
+ $files = [];
+ foreach ( $recursive_iter as $item ) {
+ $files[] = basename( $item->getPathname() );
+ }
+
+ $this->assertContains( 'included.txt', $files );
+ $this->assertContains( '.distignore', $files );
+ $this->assertNotContains( 'ignored.log', $files, 'Ignored file should not be yielded' );
+ }
+
+ /**
+ * Test that ignored directories are tracked but files inside are not yielded.
+ */
+ public function test_tracks_ignored_directories() {
+ // Create test structure.
+ mkdir( $this->temp_dir . '/node_modules' );
+ file_put_contents( $this->temp_dir . '/node_modules/package.json', '{}' );
+ file_put_contents( $this->temp_dir . '/index.php', 'temp_dir . '/.distignore', "node_modules\n" );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+ $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST );
+
+ $files = [];
+ foreach ( $recursive_iter as $item ) {
+ $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() );
+ $files[] = $relative_path;
+ }
+
+ $this->assertContains( '/index.php', $files );
+ $this->assertContains( '/.distignore', $files );
+ $this->assertContains( '/node_modules', $files, 'Ignored directory should be yielded for tracking' );
+ $this->assertNotContains( '/node_modules/package.json', $files, 'Files inside ignored directory should not be yielded' );
+ }
+
+ /**
+ * Test that getExcludedFiles returns the correct list.
+ */
+ public function test_get_excluded_files() {
+ mkdir( $this->temp_dir . '/ignored_dir' );
+ file_put_contents( $this->temp_dir . '/ignored_dir/file.txt', 'test' );
+ file_put_contents( $this->temp_dir . '/included.txt', 'test' );
+ file_put_contents( $this->temp_dir . '/.distignore', "ignored_dir\n" );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+ $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST );
+
+ // Iterate to populate excluded files.
+ iterator_to_array( $recursive_iter );
+
+ $excluded = $filter_iter->getExcludedFiles();
+
+ $this->assertContains( '/ignored_dir', $excluded );
+ $this->assertNotContains( '/included.txt', $excluded );
+ }
+
+ /**
+ * Test caching behavior to avoid duplicate checks.
+ */
+ public function test_caching_avoids_duplicate_checks() {
+ file_put_contents( $this->temp_dir . '/test.txt', 'test' );
+ file_put_contents( $this->temp_dir . '/.distignore', "*.log\n" );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+
+ // First call should cache the result.
+ $result1 = $filter_iter->isPathIgnoredCached( '/test.txt' );
+ // Second call should use cache.
+ $result2 = $filter_iter->isPathIgnoredCached( '/test.txt' );
+
+ $this->assertSame( $result1, $result2 );
+ $this->assertFalse( $result1 ); // test.txt should not be ignored.
+ }
+
+ /**
+ * Test that hasChildren prevents descent into ignored directories.
+ */
+ public function test_has_children_prevents_descent() {
+ mkdir( $this->temp_dir . '/node_modules' );
+ file_put_contents( $this->temp_dir . '/node_modules/file1.js', 'test' );
+ file_put_contents( $this->temp_dir . '/node_modules/file2.js', 'test' );
+ file_put_contents( $this->temp_dir . '/.distignore', "node_modules\n" );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+ $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST );
+
+ $files = [];
+ foreach ( $recursive_iter as $item ) {
+ $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() );
+ $files[] = $relative_path;
+ }
+
+ // The node_modules directory should be yielded but its files should not.
+ $this->assertContains( '/node_modules', $files );
+ $this->assertNotContains( '/node_modules/file1.js', $files );
+ $this->assertNotContains( '/node_modules/file2.js', $files );
+ }
+
+ /**
+ * Test handling of negation patterns.
+ */
+ public function test_negation_patterns() {
+ mkdir( $this->temp_dir . '/frontend' );
+ mkdir( $this->temp_dir . '/frontend/build' );
+ file_put_contents( $this->temp_dir . '/frontend/source.ts', 'test' );
+ file_put_contents( $this->temp_dir . '/frontend/build/output.js', 'test' );
+ file_put_contents( $this->temp_dir . '/.distignore', "frontend/*\n!/frontend/build/\n" );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+ $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST );
+
+ $files = [];
+ foreach ( $recursive_iter as $item ) {
+ $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() );
+ $files[] = $relative_path;
+ }
+
+ $this->assertContains( '/frontend', $files );
+ $this->assertContains( '/frontend/build', $files );
+ $this->assertContains( '/frontend/build/output.js', $files, 'Negated path should be included' );
+ $this->assertNotContains( '/frontend/source.ts', $files, 'Ignored file should not be included' );
+ }
+
+ /**
+ * Test getErrorForItem returns null when no error.
+ */
+ public function test_get_error_for_item_returns_null() {
+ file_put_contents( $this->temp_dir . '/test.txt', 'test' );
+ file_put_contents( $this->temp_dir . '/.distignore', '' );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+
+ $error = $filter_iter->getErrorForItem( '/test.txt' );
+
+ $this->assertNull( $error );
+ }
+
+ /**
+ * Test that multiple levels of directories are handled correctly.
+ */
+ public function test_nested_directory_filtering() {
+ mkdir( $this->temp_dir . '/src' );
+ mkdir( $this->temp_dir . '/src/components' );
+ file_put_contents( $this->temp_dir . '/src/index.php', 'temp_dir . '/src/components/widget.php', 'temp_dir . '/.distignore', '' );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+ $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST );
+
+ $files = [];
+ foreach ( $recursive_iter as $item ) {
+ $relative_path = str_replace( $this->temp_dir, '', $item->getPathname() );
+ $files[] = $relative_path;
+ }
+
+ $this->assertContains( '/src', $files );
+ $this->assertContains( '/src/components', $files );
+ $this->assertContains( '/src/index.php', $files );
+ $this->assertContains( '/src/components/widget.php', $files );
+ }
+
+ /**
+ * Test that children share the same cache and excluded files arrays.
+ */
+ public function test_children_share_state() {
+ mkdir( $this->temp_dir . '/level1' );
+ mkdir( $this->temp_dir . '/level1/level2' );
+ file_put_contents( $this->temp_dir . '/level1/file1.txt', 'test' );
+ file_put_contents( $this->temp_dir . '/level1/level2/file2.log', 'test' );
+ file_put_contents( $this->temp_dir . '/.distignore', "*.log\n" );
+
+ $checker = new GitIgnoreChecker( $this->temp_dir, '.distignore' );
+ $directory_iter = new RecursiveDirectoryIterator( $this->temp_dir, RecursiveDirectoryIterator::SKIP_DOTS );
+ $filter_iter = new Distignore_Filter_Iterator( $directory_iter, $checker, $this->temp_dir );
+ $recursive_iter = new RecursiveIteratorIterator( $filter_iter, RecursiveIteratorIterator::SELF_FIRST );
+
+ // Iterate to populate excluded files.
+ iterator_to_array( $recursive_iter );
+
+ $excluded = $filter_iter->getExcludedFiles();
+
+ // The .log file in level2 should be tracked even though it was found by a child iterator.
+ $this->assertContains( '/level1/level2/file2.log', $excluded );
+ }
+}