borgbackup · ThomasWaldmann · Feb 11, 2026 · Feb 10, 2026
diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py
@@ -15,6 +15,7 @@
 from collections import OrderedDict
 from datetime import datetime, timezone
 from functools import partial
+from hashlib import sha256
 from string import Formatter
 
 from ..logger import create_logger
@@ -876,14 +877,15 @@ class ItemFormatter(BaseFormatter):
         "isoctime": "file change time (ISO 8601 format)",
         "isoatime": "file access time (ISO 8601 format)",
         "xxh64": "XXH64 checksum of this file (note: this is NOT a cryptographic hash!)",
+        "fingerprint": "Fingerprint of the file content (may have false negatives), format: H(conditions)-H(chunk_ids)",
         "archiveid": "internal ID of the archive",
         "archivename": "name of the archive",
     }
     KEY_GROUPS = (
         ("type", "mode", "uid", "gid", "user", "group", "path", "target", "hlid", "inode", "flags"),
         ("size", "num_chunks"),
         ("mtime", "ctime", "atime", "isomtime", "isoctime", "isoatime"),
-        tuple(sorted(hash_algorithms)),
+        tuple(["fingerprint"] + sorted(hash_algorithms)),
         ("archiveid", "archivename", "extra"),
     )
 
@@ -903,6 +905,15 @@ def __init__(self, archive, format):
         self.archive = archive
         # track which keys were requested in the format string
         self.format_keys = {f[1] for f in Formatter().parse(format)}
+
+        # we want a hash over the conditions that influence the chunk ID list for a given file content:
+        # - the id algorithm and key
+        # - the chunker seed (if any - buzhash64 derives seed from id_key)
+        # - the chunker params
+        key = archive.key
+        conditions = f"{key.TYPE_STR!r}{key.id_key!r}{key.chunk_seed!r}{archive.metadata.get('chunker_params')!r}"
+        self.conditions_hash = sha256(conditions.encode()).hexdigest()
+
         self.call_keys = {
             "size": self.calculate_size,
             "num_chunks": self.calculate_num_chunks,
@@ -912,6 +923,7 @@ def __init__(self, archive, format):
             "mtime": partial(self.format_time, "mtime"),
             "ctime": partial(self.format_time, "ctime"),
             "atime": partial(self.format_time, "atime"),
+            "fingerprint": self.calculate_fingerprint,
         }
         for hash_function in self.hash_algorithms:
             self.call_keys[hash_function] = partial(self.hash_item, hash_function)
@@ -963,6 +975,16 @@ def calculate_size(self, item):
         # note: does not support hard link slaves, they will be size 0
         return item.get_size()
 
+    def calculate_fingerprint(self, item):
+        # calculate a very fast file contents fingerprint
+        chunks = item.get("chunks")
+        if chunks is None:
+            return ""
+        chunks_hash = sha256(b"".join(c.id for c in chunks)).hexdigest()
+        # we do not encounter many different conditions hashes, so the collision probability is low.
+        # thus, we can keep it short and only return 64 bits from the conditions hash.
+        return f"{self.conditions_hash[:16]}-{chunks_hash}"
+
     def hash_item(self, hash_function, item):
         if "chunks" not in item:
             return ""

diff --git a/src/borg/testsuite/archiver/list_cmd_test.py b/src/borg/testsuite/archiver/list_cmd_test.py
@@ -201,3 +201,61 @@ def test_list_inode_hardlinks(archivers, request):
         assert inodes["input/fileA"] != inodes["input/fileC"]
     else:
         pytest.skip("Platform does not provide inode numbers for items")
+
+
+def test_fingerprint(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    create_regular_file(archiver.input_path, "file1", contents=b"content")
+    create_regular_file(archiver.input_path, "file2", contents=b"other")
+    cmd(archiver, "create", "test1", "input")
+
+    output = cmd(archiver, "list", "test1", "--format={fingerprint} {path}{NL}")
+    fingerprints1 = {}
+    for line in output.splitlines():
+        fp, path = line.split(" ", 1)
+        fingerprints1[path] = fp
+
+    # Same content, same chunker params -> same fingerprint
+    cmd(archiver, "create", "test2", "input")
+    output = cmd(archiver, "list", "test2", "--format={fingerprint} {path}{NL}")
+    fingerprints2 = {}
+    for line in output.splitlines():
+        fp, path = line.split(" ", 1)
+        fingerprints2[path] = fp
+    assert fingerprints1 == fingerprints2
+
+    # Modified content -> different fingerprint
+    create_regular_file(archiver.input_path, "file1", contents=b"modification")
+    cmd(archiver, "create", "test3", "input")
+    output = cmd(archiver, "list", "test3", "--format={fingerprint} {path}{NL}")
+    fingerprints3 = {}
+    for line in output.splitlines():
+        fp, path = line.split(" ", 1)
+        fingerprints3[path] = fp
+    assert fingerprints1["input/file1"] != fingerprints3["input/file1"]
+    # Unmodified file should still match
+    assert fingerprints1["input/file2"] == fingerprints3["input/file2"]
+
+    # Different chunker params -> different fingerprint
+    # We can use the same repo but specify different chunker params for a new archive
+    cmd(archiver, "create", "--chunker-params=fixed,4096", "test4", "input")
+    output = cmd(archiver, "list", "test4", "--format={fingerprint} {path}{NL}")
+    fingerprints4 = {}
+    for line in output.splitlines():
+        fp, path = line.split(" ", 1)
+        fingerprints4[path] = fp
+
+    # Even unmodified files should have different fingerprints because conditions_hash changed
+    assert fingerprints1["input/file2"] != fingerprints4["input/file2"]
+
+    # Also try with buzhash64
+    cmd(archiver, "create", "--chunker-params=buzhash64,10,23,16,4095", "test5", "input")
+    output = cmd(archiver, "list", "test5", "--format={fingerprint} {path}{NL}")
+    fingerprints5 = {}
+    for line in output.splitlines():
+        fp, path = line.split(" ", 1)
+        fingerprints5[path] = fp
+
+    # Even unmodified files should have different fingerprints because conditions_hash changed
+    assert fingerprints1["input/file2"] != fingerprints5["input/file2"]