diff --git a/.github/workflows/yetus-general-check.yml b/.github/workflows/yetus-general-check.yml index bb285a7b0692..ecaf94c19424 100644 --- a/.github/workflows/yetus-general-check.yml +++ b/.github/workflows/yetus-general-check.yml @@ -97,7 +97,7 @@ jobs: if: always() run: | cd "${{ github.workspace }}" - python3 src/dev-support/yetus_console_to_md.py yetus-general-check/output/console.txt >> $GITHUB_STEP_SUMMARY + python3 src/dev-support/yetus_console_to_md.py yetus-general-check/output >> $GITHUB_STEP_SUMMARY - name: Publish Test Results if: always() diff --git a/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml index 8526943c3339..8d41b86b99e4 100644 --- a/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml +++ b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml @@ -95,7 +95,7 @@ jobs: if: always() run: | cd "${{ github.workspace }}" - python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-compile-check/output/console.txt >> $GITHUB_STEP_SUMMARY + python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-compile-check/output >> $GITHUB_STEP_SUMMARY - name: Publish Results if: always() diff --git a/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml index b4cc992b9a08..f29acabb5290 100644 --- a/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml +++ b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml @@ -97,7 +97,6 @@ jobs: PLUGINS: "github,htmlout,maven,unit" SET_JAVA_HOME: "/usr/lib/jvm/java-17" SOURCEDIR: "${{ github.workspace }}/src" - TESTS_FILTER: "mvninstall" YETUSDIR: "${{ github.workspace }}/yetus" AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc" BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt" @@ -118,7 +117,7 @@ jobs: if: always() run: | cd "${{ github.workspace }}" - python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-unit-check/output/console.txt >> $GITHUB_STEP_SUMMARY + python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-unit-check/output >> $GITHUB_STEP_SUMMARY - name: Publish Test Results if: always() diff --git a/dev-support/jenkins_precommit_github_yetus.sh b/dev-support/jenkins_precommit_github_yetus.sh index 59d4cf2b82c3..4ec0c1d3829e 100755 --- a/dev-support/jenkins_precommit_github_yetus.sh +++ b/dev-support/jenkins_precommit_github_yetus.sh @@ -38,7 +38,6 @@ declare -a required_envs=( "PLUGINS" "SET_JAVA_HOME" "SOURCEDIR" - "TESTS_FILTER" "YETUSDIR" "AUTHOR_IGNORE_LIST" "BLANKS_EOL_IGNORE_FILE" @@ -126,7 +125,9 @@ YETUS_ARGS+=("--java-home=${SET_JAVA_HOME}") YETUS_ARGS+=("--author-ignore-list=${AUTHOR_IGNORE_LIST}") YETUS_ARGS+=("--blanks-eol-ignore-file=${BLANKS_EOL_IGNORE_FILE}") YETUS_ARGS+=("--blanks-tabs-ignore-file=${BLANKS_TABS_IGNORE_FILE}*") -YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}") +if [[ -n "${TESTS_FILTER}" ]]; then + YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}") +fi YETUS_ARGS+=("--personality=${SOURCEDIR}/dev-support/hbase-personality.sh") YETUS_ARGS+=("--quick-hadoopcheck") if [[ "${SKIP_ERRORPRONE}" = "true" ]]; then diff --git a/dev-support/yetus_console_to_md.py b/dev-support/yetus_console_to_md.py index e03b5e17ba1a..bee5512eec71 100644 --- a/dev-support/yetus_console_to_md.py +++ b/dev-support/yetus_console_to_md.py @@ -19,11 +19,12 @@ """ Convert Apache Yetus console output to Markdown format. """ +import os import re import sys +from io import TextIOWrapper from pathlib import Path -from typing import List, Optional, Tuple - +from typing import Dict, List, Optional, Tuple # Vote to emoji mapping VOTE_EMOJI = { @@ -45,24 +46,304 @@ def is_runtime(text: str) -> bool: return bool(re.match(r'^\d+m\s+\d+s$', text)) -def parse_table_row(line: str) -> List[str]: +def parse_table_row(line: str) -> Tuple[str, str, str, str]: """ - Parse a table row and return list of cell values. - Returns exactly 4 columns: [vote, subsystem, runtime, comment] + Parse a table row and return tuple of cell values. + Returns exactly 4 columns: (vote, subsystem, runtime, comment) """ parts = line.split('|') # Remove first empty element (from leading |) parts = parts[1:] if len(parts) > 1 else [] - result = [] - for p in parts[:4]: # Take first 4 columns - result.append(p.strip()) + # Take first 4 columns and strip whitespace + result: List[str] = [p.strip() for p in parts[:4]] # Pad to 4 columns if needed while len(result) < 4: result.append('') - return result + return result[0], result[1], result[2], result[3] + + +def is_results_section_start(line: str) -> bool: + """Check if line indicates the start of Results section.""" + return bool(re.search(r'^\[\w+] Results:', line.strip())) + + +def is_tests_run_summary(line: str) -> bool: + """Check if line is the Tests run summary line.""" + return bool(re.search(r'^\[\w+] Tests run:', line.strip())) + + +def parse_results_section( + f: TextIOWrapper, + failures: List[str], + flakes: List[str], + errors: List[str] +) -> None: + """ + Parse the Results section within a patch-unit file. + """ + current_error_type = None + while line := f.readline(): + stripped = line.strip() + + # Section end markers + if is_tests_run_summary(line): + return + + # Detect error type sections + if re.search(r'^\[\w+] Failures:', stripped): + current_error_type = failures + elif re.search(r'^\[\w+] Flakes:', stripped): + current_error_type = flakes + elif re.search(r'^\[\w+] Errors:', stripped): + current_error_type = errors + else: + # Parse test entries + if current_error_type is not None: + test_match = re.search( + r'^\[\w+]\s+((?:org\.)?\S+\.(?:\w+\.)*\w+\.\w+)', + stripped + ) + if test_match: + test_name = test_match.group(1) + if 'test' in test_name.lower(): + current_error_type.append(test_name) + + +def skip_to_results_section(f: TextIOWrapper) -> bool: + """ + Skip the io stream to the Results section. + After calling this method, the TextIOWrapper will locate at the next line of "Results: " + + Returns: + True if we find a results section, False if we have reached the EOF + """ + while line := f.readline(): + if is_results_section_start(line): + return True + return False + + +def scan_all_tests(dir: Path) -> Dict[str, str]: + """ + Scan the archiver dir to find all the tests and their module + + Returns: + Dict mapping test name to module name + """ + module = None + module_to_test_name = {} + for dirpath, _, filenames in os.walk(dir): + if len(filenames) > 0: + # /archiver//target/surefire-reports + module = dirpath.split(os.sep)[-3] + for filename in filenames: + match = re.match(r'(org\.apache\.[^-]+)\.txt', filename) + if match: + module_to_test_name[match.group(1)] = module + return module_to_test_name + + +def parse_patch_unit_file( + file_path: Path, + failures: List[str], + flakes: List[str], + errors: List[str] +) -> None: + """ + Parse a patch-unit-*.txt file and extract failed tests by module. + """ + with open(file_path, 'r') as f: + while skip_to_results_section(f): + parse_results_section(f, failures, flakes, errors) + + +def get_module(test_name: str, test_name_to_module: Dict[str, str]) -> str: + rindex_of_bracket = test_name.rfind('[') + if rindex_of_bracket > 0: + # parameterized test, remove the tailing parameters + test_name = test_name[:rindex_of_bracket] + + module = test_name_to_module.get(test_name) + if module: + return module + + # usually the failed test name has the method name suffix, but the test_name_to_module only + # contains class name, so let's try to remove the last part and try again + rindex_of_dot = test_name.rfind('.') + if rindex_of_dot > 0: + test_name = test_name[:rindex_of_dot] + + module = test_name_to_module.get(test_name) + if module: + return module + return 'default' + + +def increase(module_to_count: Dict[str, int], module: str) -> None: + if module in module_to_count: + module_to_count[module] += 1 + else: + module_to_count[module] = 1 + + +def add_to_details(test_name: str, module: str, error_type: str, + details: Dict[str, Dict[str, List[str]]]) -> None: + if module not in details: + error_type_to_tests = {} + details[module] = error_type_to_tests + else: + error_type_to_tests = details[module] + + if error_type in error_type_to_tests: + error_type_to_tests[error_type].append(test_name) + else: + error_type_to_tests[error_type] = [test_name] + + +def process_failed_tests( + error_type: str, + failed_tests: List[str], + module_to_test_name: Dict[str, str], + counts: Dict[str, Dict[str, int]], + details: Dict[str, Dict[str, List[str]]] +) -> None: + for test_name in failed_tests: + module = get_module(test_name, module_to_test_name) + increase(counts[error_type], module) + add_to_details(test_name, module, error_type, details) + + +def aggregate_failed_tests(yetus_dir: Path) -> Tuple[ + Dict[str, Dict[str, int]], Dict[str, Dict[str, List[str]]]]: + """ + Aggregate failed tests from all patch-unit-*.txt files. + + Returns: + Tuple of: + - counts: {error_type: {module: count}} + - details: {module: {error_type: [test_names]}} + """ + patch_files = list(yetus_dir.glob('patch-unit-*.txt')) + + if not patch_files: + return {}, {} + + # Aggregate results from all files + failures = [] + flakes = [] + errors = [] + + for patch_file in patch_files: + parse_patch_unit_file(patch_file, failures, flakes, errors) + + if not failures and not flakes and not errors: + return {}, {} + + counts = {'Failures': {}, 'Flakes': {}, 'Errors': {}} + details = {} + module_to_test_name = scan_all_tests(yetus_dir / 'archiver') + process_failed_tests('Failures', failures, module_to_test_name, counts, details) + process_failed_tests('Flakes', flakes, module_to_test_name, counts, details) + process_failed_tests('Errors', errors, module_to_test_name, counts, details) + + return dict(counts), dict(details) + + +def generate_failed_tests_table( + counts: Dict[str, Dict[str, int]], + details: Dict[str, Dict[str, List[str]]] +) -> List[str]: + """Generate the Failed Tests HTML table.""" + total_failures = sum(sum(m.values()) for m in counts.values()) + if total_failures == 0: + return [] + + content = [ + '\n## Failed Tests\n\n', + '\n', + '\n', + '\n' + ] + + error_types = ['Failures', 'Flakes', 'Errors'] + + for error_type in error_types: + if error_type not in counts: + continue + + modules = counts[error_type] + total_count = sum(modules.values()) + num_modules = len(modules) + + first_row = True + for module in sorted(modules.keys()): + tests = details.get(module, {}).get(error_type, []) + tests_str = '
'.join(sorted(set(tests))) if tests else '' + + if first_row: + content.append( + f'' + f'' + f'\n' + ) + first_row = False + else: + content.append(f'\n') + + content.extend(['\n', '
Error TypeCountModuleTests
{error_type}{total_count}{module}{tests_str}
{module}{tests_str}
\n']) + + return content + + +def collect_continuation_lines( + lines: List[str], + start_idx: int +) -> Tuple[List[str], int]: + """ + Collect continuation lines for a table row. + + Args: + lines: All lines from the file + start_idx: Index to start checking from + + Returns: + Tuple of (list of comment parts, next index to process) + """ + comment_parts = [] + i = start_idx + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + if not stripped.startswith('|'): + break + + if '|| Subsystem || Report/Notes ||' in line: + break + + vote, _, runtime, comment = parse_table_row(line) + + # Stop at new data row + if vote in VOTE_EMOJI: + break + + # Empty vote/subsystem means continuation or separator + if not vote: + if comment: + comment_parts.append(comment) + i += 1 + elif runtime and is_runtime(runtime): + break + else: + i += 1 + else: + break + + return comment_parts, i def process_first_table(lines: List[str], start_idx: int) -> Tuple[List[str], int]: @@ -70,15 +351,15 @@ def process_first_table(lines: List[str], start_idx: int) -> Tuple[List[str], in Process the first table (Vote, Subsystem, Runtime, Comment). Returns: - Tuple of (markdown lines, next index to process) + Tuple of (Markdown lines, next index to process) """ - content = [] - i = start_idx + content = [ + '\n', + '| Vote | Subsystem | Runtime | Comment |\n', + '|------|-----------|---------|---------|\n' + ] - # Add table header - content.append('\n') - content.append('| Vote | Subsystem | Runtime | Comment |\n') - content.append('|------|-----------|---------|---------|\n') + i = start_idx # Skip the original separator line if i < len(lines) and '===' in lines[i]: @@ -88,184 +369,44 @@ def process_first_table(lines: List[str], start_idx: int) -> Tuple[List[str], in line = lines[i] stripped = line.strip() - # Check for second table start if '|| Subsystem || Report/Notes ||' in line: break - # Skip section separator lines (like +-----------) if stripped.startswith('+--'): i += 1 continue - # Process table rows - if stripped.startswith('|'): - parts = parse_table_row(line) - vote, subsystem, runtime, comment = parts[0], parts[1], parts[2], parts[3] - - # Case 1: Section header (vote and subsystem are empty, has comment) - if not vote and not subsystem: - if comment: - content.append(f'| | | | {comment} |\n') - i += 1 - continue - # If there's only runtime, it's a total time row - elif runtime and is_runtime(runtime): - content.append(f'| | | {runtime} | |\n') - i += 1 - continue - else: - # Empty row, skip - i += 1 - continue - - # Case 2: Data row with vote - if vote in VOTE_EMOJI: - vote_emoji = convert_vote(vote) - comment_parts = [comment] if comment else [] - - # Check for continuation lines - i += 1 - while i < len(lines): - next_line = lines[i] - next_stripped = next_line.strip() - - if not next_stripped.startswith('|'): - break - - # Check for second table start - if '|| Subsystem || Report/Notes ||' in next_line: - break - - next_parts = parse_table_row(next_line) - next_vote, next_subsystem, next_runtime, next_comment = next_parts[0], next_parts[1], next_parts[2], next_parts[3] - - # Stop at new data row - if next_vote in VOTE_EMOJI: - break - - # If vote and subsystem are empty, check if it's a continuation - if not next_vote and not next_subsystem: - # If there's a comment, it's a continuation - if next_comment: - comment_parts.append(next_comment) - i += 1 - # If there's only runtime, it's a standalone total time row - elif next_runtime and is_runtime(next_runtime): - break - else: - i += 1 - else: - break - - comment_text = ' '.join(comment_parts) - content.append(f'| {vote_emoji} | {subsystem} | {runtime} | {comment_text} |\n') - continue - - # Case 3: Other cases, skip + if not stripped.startswith('|'): i += 1 continue - i += 1 - - return content, i - - -SECTION_HEADERS = { - '[ERROR] Failures:': 'failures', - '[ERROR] Errors:': 'errors', - '[WARNING] Flakes:': 'flakes', -} - + vote, subsystem, runtime, comment = parse_table_row(line) -# TODO: Yetus should support this natively, but docker integration with job summaries doesn't seem -# to work out of the box. -def extract_test_results_from_unit_files( - output_dir: Path, -) -> Tuple[List[Tuple[str, List[str]]], List[Tuple[str, List[str]]]]: - """ - Extract failed and flaky test names from patch-unit-*.txt files. - - Parses Maven surefire summary sections: - [ERROR] Failures: - assertion failures - [ERROR] Errors: - exceptions thrown during test execution - [WARNING] Flakes: - tests that failed on some runs but passed on retry - - Returns: - Tuple of (failed_tests, flaky_tests) where each is - List of (module_name, [test_names]) tuples - """ - all_failed = [] - all_flaky = [] - - for unit_file in output_dir.glob('patch-unit-*.txt'): - module_name = unit_file.stem.replace('patch-unit-', '') - failed_tests = set() - flaky_tests = set() - - with open(unit_file, 'r') as f: - current_section = None - for line in f: - stripped = line.strip() - - if stripped in SECTION_HEADERS: - current_section = SECTION_HEADERS[stripped] - continue - - if stripped.startswith('[ERROR] Tests run:'): - current_section = None - continue - - if current_section is None: - continue - - if re.match(r'\[(ERROR|INFO)]\s+Run \d+:', stripped): - continue - if stripped.startswith('[INFO]') or not stripped: - continue - - if current_section in ('failures', 'errors'): - if stripped.startswith('[ERROR]'): - test_name = stripped[len('[ERROR]'):].strip() - if test_name and '.' in test_name: - failed_tests.add(test_name) - elif current_section == 'flakes': - if stripped.startswith('[WARNING]'): - test_name = stripped[len('[WARNING]'):].strip() - if test_name and '.' in test_name: - flaky_tests.add(test_name) - - if failed_tests: - all_failed.append((module_name, sorted(failed_tests))) - if flaky_tests: - all_flaky.append((module_name, sorted(flaky_tests))) - - return all_failed, all_flaky - - -def _format_test_table( - heading: str, tests: List[Tuple[str, List[str]]], column_name: str -) -> List[str]: - if not tests: - return [] - - content = [] - content.append(f'\n## {heading}\n\n') - content.append(f'| Module | {column_name} |\n') - content.append('|--------|-------------|\n') - - for module_name, names in tests: - for name in names: - content.append(f'| {module_name} | {name} |\n') + # Section header (vote and subsystem are empty) + if not vote and not subsystem: + if comment: + content.append(f'| | | | {comment} |\n') + elif runtime and is_runtime(runtime): + content.append(f'| | | {runtime} | |\n') + i += 1 + continue - return content + # Data row with vote + if vote in VOTE_EMOJI: + vote_emoji = convert_vote(vote) + comment_parts = [comment] if comment else [] + continuation_parts, i = collect_continuation_lines(lines, i + 1) + comment_parts.extend(continuation_parts) -def format_failed_tests_section(failed_tests: List[Tuple[str, List[str]]]) -> List[str]: - return _format_test_table('❌ Failed Tests', failed_tests, 'Failed Tests') + comment_text = ' '.join(comment_parts) + content.append(f'| {vote_emoji} | {subsystem} | {runtime} | {comment_text} |\n') + continue + # Other cases, skip + i += 1 -def format_flaky_tests_section(flaky_tests: List[Tuple[str, List[str]]]) -> List[str]: - return _format_test_table('⚠️ Flaky Tests (passed on retry)', flaky_tests, 'Flaky Tests') + return content, i def process_second_table(lines: List[str], start_idx: int) -> Tuple[List[str], int]: @@ -273,15 +414,15 @@ def process_second_table(lines: List[str], start_idx: int) -> Tuple[List[str], i Process the second table (Subsystem, Report/Notes). Returns: - Tuple of (markdown lines, next index to process) + Tuple of (Markdown lines, next index to process) """ - content = [] - i = start_idx + content = [ + '\n## Subsystem Reports\n\n', + '| Subsystem | Report/Notes |\n', + '|-----------|------------|\n' + ] - # Add table header - content.append('\n## Subsystem Reports\n\n') - content.append('| Subsystem | Report/Notes |\n') - content.append('|-----------|------------|\n') + i = start_idx # Skip the original separator line if i < len(lines) and '===' in lines[i]: @@ -294,7 +435,7 @@ def process_second_table(lines: List[str], start_idx: int) -> Tuple[List[str], i if not stripped.startswith('|'): break - # Split by | and get non-empty parts (at least 2) + # Split by | and get non-empty parts parts = [p.strip() for p in stripped.split('|') if p.strip()] if len(parts) >= 2: content.append(f'| {parts[0]} | {parts[1]} |\n') @@ -304,60 +445,54 @@ def process_second_table(lines: List[str], start_idx: int) -> Tuple[List[str], i return content, i -def convert_console_to_markdown(input_file: str, output_file: Optional[str] = None) -> str: - """Convert console to Markdown format.""" - input_path = Path(input_file) - output_dir = input_path.parent +def convert_console_to_markdown(input_dir: str, output_file: Optional[str] = None) -> str: + """Convert Yetus console output to Markdown format.""" + input_path = Path(input_dir) - with open(input_file, 'r') as f: + if not input_path.is_dir(): + print(f'Error: Input path "{input_dir}" is not a directory', file=sys.stderr) + sys.exit(1) + + console_file = input_path / 'console.txt' + if not console_file.exists(): + print(f'Error: console.txt not found in "{input_dir}"', file=sys.stderr) + sys.exit(1) + + with open(console_file, 'r') as f: lines = f.readlines() content = [] i = 0 - added_failed_tests = False while i < len(lines): line = lines[i] stripped = line.strip() - # Handle overall line if stripped == '-1 overall': content.append(f'

❌ {stripped}

\n') i += 1 - continue - - if stripped == '+1 overall': + elif stripped == '+1 overall': content.append(f'

✅ {stripped}

\n') i += 1 - continue - - # Detect first table start - if '| Vote |' in line and 'Subsystem' in line: + elif '| Vote |' in line and 'Subsystem' in line: table_content, i = process_first_table(lines, i + 1) content.extend(table_content) - # Extract and add failed tests from patch-unit-*.txt files - if not added_failed_tests: - failed_tests, flaky_tests = extract_test_results_from_unit_files(output_dir) - content.extend(format_failed_tests_section(failed_tests)) - content.extend(format_flaky_tests_section(flaky_tests)) - added_failed_tests = True - continue - - # Detect second table start - if '|| Subsystem || Report/Notes ||' in line: + counts, details = aggregate_failed_tests(input_path) + if counts: + content.extend(generate_failed_tests_table(counts, details)) + elif '|| Subsystem || Report/Notes ||' in line: table_content, i = process_second_table(lines, i + 1) content.extend(table_content) - continue - - i += 1 + else: + i += 1 result = ''.join(content) if output_file: with open(output_file, 'w') as f: f.write(result) - print(f'Converted {input_file} to {output_file}', file=sys.stderr) + print(f'Converted {input_dir} to {output_file}', file=sys.stderr) else: print(result, end='') @@ -366,18 +501,21 @@ def convert_console_to_markdown(input_file: str, output_file: Optional[str] = No def main(): if len(sys.argv) < 2: - print(f'Usage: {sys.argv[0]} [output_file]', file=sys.stderr) + print(f'Usage: {sys.argv[0]} [output_file]', file=sys.stderr) + print( + f' input_directory: Directory containing console.txt and optional patch-unit-*.txt files', + file=sys.stderr) print(f' If output_file is not provided, output goes to stdout', file=sys.stderr) sys.exit(1) - input_file = sys.argv[1] + input_dir = sys.argv[1] output_file = sys.argv[2] if len(sys.argv) > 2 else None - if not Path(input_file).exists(): - print(f'Error: Input file "{input_file}" does not exist', file=sys.stderr) + if not Path(input_dir).exists(): + print(f'Error: Input directory "{input_dir}" does not exist', file=sys.stderr) sys.exit(1) - convert_console_to_markdown(input_file, output_file) + convert_console_to_markdown(input_dir, output_file) if __name__ == '__main__':