From 06615bdb1969d9f13d65cbbc755e1303dbe35d6a Mon Sep 17 00:00:00 2001 From: Jose Rodriguez Date: Sat, 29 Mar 2025 12:43:23 +0100 Subject: [PATCH] feat: adds a tool to consolidate (hard-link) duplicated files Many files in the libs/ directory are duplicated because most archs share a lot in common. Mantaining modification along all possible archs is tedious, so this tool hardlinks all identical files son modifying one is modifying all of them. Git is agnostic to this. --- tools/consolidate-libs.py | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100755 tools/consolidate-libs.py diff --git a/tools/consolidate-libs.py b/tools/consolidate-libs.py new file mode 100755 index 000000000..4ca91ca67 --- /dev/null +++ b/tools/consolidate-libs.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +__doc__ = """Scans src/lib//** and does hardlinks to files +with the same name and content""" + +import glob +import os +import re +from collections import defaultdict +from pathlib import Path +from typing import NamedTuple + +ROOT_DIR = Path(__file__).parent.parent.absolute() / "src" / "lib" / "arch" +ARCHS = "zx48k", "zxnext" + + +class FileInfo(NamedTuple): + path: str + hash: int + + +def get_file_list(root: Path) -> list[str]: + filelist = glob.glob(str(root / "**" / "*"), recursive=True) + return [f for f in filelist if os.path.isfile(f)] + + +def scan_arch(root: Path) -> dict[FileInfo, list[str]]: + result = defaultdict(list) + re_arch = re.compile(r"^.*?/src/lib/arch/[^/]+/(.*)$") + + files = get_file_list(root) + for file in files: + match = re_arch.match(file) + if not match: + continue + + path = match.group(1) + result[FileInfo(path=path, hash=hash(open(file, "rb").read()))].append(file) + + return result + + +def fold_files(scan: dict[FileInfo, list[str]]) -> None: + for path, files in scan.items(): + if len(files) == 1: + continue + + main_file = files[0] + for file in files[1:]: + print(f"Linking {main_file} to {file}") + os.unlink(file) + os.link(main_file, file) + + +def main(): + scan = scan_arch(ROOT_DIR) + fold_files(scan) + + +if __name__ == "__main__": + main()