From 7cc33b95e61eccf7618b73b095e9eb98850ba3a1 Mon Sep 17 00:00:00 2001 From: Patrick Toal Date: Thu, 30 Apr 2026 07:51:39 -0400 Subject: [PATCH] Add zero scanner --- misc_tools/scan_zeros.py | 168 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 misc_tools/scan_zeros.py diff --git a/misc_tools/scan_zeros.py b/misc_tools/scan_zeros.py new file mode 100644 index 0000000..cb14dd7 --- /dev/null +++ b/misc_tools/scan_zeros.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +Scan for zeroed regions on the raw disks (via RAID0 striping). + +Scans the md0 virtual byte space in the vicinity of LV_START +(where the ext4 filesystem begins) and reports contiguous zero regions. + +Reads at SCAN_BLOCK granularity, reports transitions zero→nonzero and vice versa. + +Usage: + sudo python3 misc_tools/scan_zeros.py + sudo python3 misc_tools/scan_zeros.py --start-mb 0 --end-mb 50 # whole pre-LV area + sudo python3 misc_tools/scan_zeros.py --full-lv # scan entire ext4 FS +""" + +import sys, os, argparse +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +DISKS = ['/dev/sda', '/dev/sde', '/dev/sdd', '/dev/sdc'] +NUM_DISKS = 4 +CHUNK_BYTES = 128 * 512 # 64 KB +DISK_DATA_START = 0 + +LV_START_BYTES = 5120000 * 512 # 2,621,440,000 (~2.44 GB into md0) +LV_SIZE_BYTES = 9365766144 * 512 # ~4.55 TB (full ext4 virtual size) + +SCAN_BLOCK = 64 * 1024 # granularity: 64 KB (one stripe chunk) + +DISK_FDS = [] + + +def md0_read(md0_offset: int, length: int) -> bytes: + result = bytearray(length) + pos = md0_offset + remaining = length + while remaining > 0: + chunk_num = pos // CHUNK_BYTES + intra = pos % CHUNK_BYTES + disk_idx = chunk_num % NUM_DISKS + chunk_on_disk = chunk_num // NUM_DISKS + seg_len = min(CHUNK_BYTES - intra, remaining) + dst_off = pos - md0_offset + disk_byte = DISK_DATA_START + chunk_on_disk * CHUNK_BYTES + intra + fd = DISK_FDS[disk_idx] + fd.seek(disk_byte) + data = fd.read(seg_len) + result[dst_off:dst_off + len(data)] = data + pos += seg_len + remaining -= seg_len + return bytes(result) + + +def fmt_bytes(b): + if b >= 1024**3: + return f'{b/1024**3:.3f} GB' + if b >= 1024**2: + return f'{b/1024**2:.2f} MB' + return f'{b/1024:.1f} KB' + + +def fmt_md0(md0_off): + """Show md0 offset both in absolute bytes and relative to LV_START.""" + rel = md0_off - LV_START_BYTES + sign = '+' if rel >= 0 else '-' + return (f'md0:{md0_off} (LV_START{sign}{fmt_bytes(abs(rel))})') + + +def scan(start_md0: int, end_md0: int, label: str): + print(f'\nScanning {label}') + print(f' Range: {fmt_bytes(start_md0)} – {fmt_bytes(end_md0)} in md0') + print(f' ({fmt_bytes(end_md0 - start_md0)} total, {SCAN_BLOCK//1024}KB blocks)') + print() + + in_zero_run = None # start of current zero run, or None + zero_runs = [] + last_report = start_md0 + + pos = start_md0 + while pos < end_md0: + length = min(SCAN_BLOCK, end_md0 - pos) + try: + data = md0_read(pos, length) + except Exception as e: + print(f' READ ERROR at {fmt_md0(pos)}: {e}') + pos += length + continue + + is_zero = not any(data) + + if is_zero and in_zero_run is None: + in_zero_run = pos + elif not is_zero and in_zero_run is not None: + run_len = pos - in_zero_run + zero_runs.append((in_zero_run, pos)) + print(f' ZERO {fmt_md0(in_zero_run)} len={fmt_bytes(run_len)}') + in_zero_run = None + + # Progress every 64 MB + if pos - last_report >= 64 * 1024 * 1024: + pct = 100 * (pos - start_md0) / (end_md0 - start_md0) + print(f' ... {fmt_bytes(pos)} ({pct:.0f}%)', flush=True) + last_report = pos + + pos += length + + if in_zero_run is not None: + run_len = end_md0 - in_zero_run + zero_runs.append((in_zero_run, end_md0)) + print(f' ZERO {fmt_md0(in_zero_run)} len={fmt_bytes(run_len)} (extends to scan end)') + + print() + if zero_runs: + total_zero = sum(e - s for s, e in zero_runs) + print(f' Summary: {len(zero_runs)} zero region(s), {fmt_bytes(total_zero)} total') + else: + print(f' Summary: no zero regions found in this range') + + return zero_runs + + +def main(): + global DISK_DATA_START, DISK_FDS + + parser = argparse.ArgumentParser() + parser.add_argument('--disk-offset', type=int, default=0) + parser.add_argument('--start-mb', type=float, default=None, + help='scan start as MD0 offset in MB (default: 64MB before LV_START)') + parser.add_argument('--end-mb', type=float, default=None, + help='scan end as MD0 offset in MB (default: LV_START + 64MB)') + parser.add_argument('--full-lv', action='store_true', + help='scan the entire ext4 LV (slow — ~4.5TB)') + parser.add_argument('--block-kb', type=int, default=64, + help='scan block size in KB (default 64)') + args = parser.parse_args() + + DISK_DATA_START = args.disk_offset + global SCAN_BLOCK + SCAN_BLOCK = args.block_kb * 1024 + + print('Opening disks (read-only)...') + for path in DISKS: + fd = open(path, 'rb') + DISK_FDS.append(fd) + print(f' {path} OK') + + if args.full_lv: + scan(LV_START_BYTES, LV_START_BYTES + LV_SIZE_BYTES, 'full ext4 LV') + else: + if args.start_mb is not None: + start = int(args.start_mb * 1024 * 1024) + else: + # Default: 64MB before LV_START to catch anything before the FS + start = max(0, LV_START_BYTES - 64 * 1024 * 1024) + + if args.end_mb is not None: + end = int(args.end_mb * 1024 * 1024) + else: + # Default: 64MB into the ext4 FS (covers first ~16 block groups) + end = LV_START_BYTES + 64 * 1024 * 1024 + + scan(start, end, 'LV boundary region') + + for fd in DISK_FDS: + fd.close() + + +if __name__ == '__main__': + main()