#!/usr/bin/env python3 """ Scan for zeroed regions on the raw disks (via RAID0 striping). Scans the md0 virtual byte space in the vicinity of LV_START (where the ext4 filesystem begins) and reports contiguous zero regions. Reads at SCAN_BLOCK granularity, reports transitions zero→nonzero and vice versa. Usage: sudo python3 misc_tools/scan_zeros.py sudo python3 misc_tools/scan_zeros.py --start-mb 0 --end-mb 50 # whole pre-LV area sudo python3 misc_tools/scan_zeros.py --full-lv # scan entire ext4 FS """ import sys, os, argparse sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) DISKS = ['/dev/sda', '/dev/sde', '/dev/sdd', '/dev/sdc'] NUM_DISKS = 4 CHUNK_BYTES = 128 * 512 # 64 KB DISK_DATA_START = 0 LV_START_BYTES = 5120000 * 512 # 2,621,440,000 (~2.44 GB into md0) LV_SIZE_BYTES = 9365766144 * 512 # ~4.55 TB (full ext4 virtual size) SCAN_BLOCK = 64 * 1024 # granularity: 64 KB (one stripe chunk) DISK_FDS = [] def md0_read(md0_offset: int, length: int) -> bytes: result = bytearray(length) pos = md0_offset remaining = length while remaining > 0: chunk_num = pos // CHUNK_BYTES intra = pos % CHUNK_BYTES disk_idx = chunk_num % NUM_DISKS chunk_on_disk = chunk_num // NUM_DISKS seg_len = min(CHUNK_BYTES - intra, remaining) dst_off = pos - md0_offset disk_byte = DISK_DATA_START + chunk_on_disk * CHUNK_BYTES + intra fd = DISK_FDS[disk_idx] fd.seek(disk_byte) data = fd.read(seg_len) result[dst_off:dst_off + len(data)] = data pos += seg_len remaining -= seg_len return bytes(result) def fmt_bytes(b): if b >= 1024**3: return f'{b/1024**3:.3f} GB' if b >= 1024**2: return f'{b/1024**2:.2f} MB' return f'{b/1024:.1f} KB' def fmt_md0(md0_off): """Show md0 offset both in absolute bytes and relative to LV_START.""" rel = md0_off - LV_START_BYTES sign = '+' if rel >= 0 else '-' return (f'md0:{md0_off} (LV_START{sign}{fmt_bytes(abs(rel))})') def scan(start_md0: int, end_md0: int, label: str): print(f'\nScanning {label}') print(f' Range: {fmt_bytes(start_md0)} – {fmt_bytes(end_md0)} in md0') print(f' ({fmt_bytes(end_md0 - start_md0)} total, {SCAN_BLOCK//1024}KB blocks)') print() in_zero_run = None # start of current zero run, or None zero_runs = [] last_report = start_md0 pos = start_md0 while pos < end_md0: length = min(SCAN_BLOCK, end_md0 - pos) try: data = md0_read(pos, length) except Exception as e: print(f' READ ERROR at {fmt_md0(pos)}: {e}') pos += length continue is_zero = not any(data) if is_zero and in_zero_run is None: in_zero_run = pos elif not is_zero and in_zero_run is not None: run_len = pos - in_zero_run zero_runs.append((in_zero_run, pos)) print(f' ZERO {fmt_md0(in_zero_run)} len={fmt_bytes(run_len)}') in_zero_run = None # Progress every 64 MB if pos - last_report >= 64 * 1024 * 1024: pct = 100 * (pos - start_md0) / (end_md0 - start_md0) print(f' ... {fmt_bytes(pos)} ({pct:.0f}%)', flush=True) last_report = pos pos += length if in_zero_run is not None: run_len = end_md0 - in_zero_run zero_runs.append((in_zero_run, end_md0)) print(f' ZERO {fmt_md0(in_zero_run)} len={fmt_bytes(run_len)} (extends to scan end)') print() if zero_runs: total_zero = sum(e - s for s, e in zero_runs) print(f' Summary: {len(zero_runs)} zero region(s), {fmt_bytes(total_zero)} total') else: print(f' Summary: no zero regions found in this range') return zero_runs def main(): global DISK_DATA_START, DISK_FDS parser = argparse.ArgumentParser() parser.add_argument('--disk-offset', type=int, default=0) parser.add_argument('--start-mb', type=float, default=None, help='scan start as MD0 offset in MB (default: 64MB before LV_START)') parser.add_argument('--end-mb', type=float, default=None, help='scan end as MD0 offset in MB (default: LV_START + 64MB)') parser.add_argument('--full-lv', action='store_true', help='scan the entire ext4 LV (slow — ~4.5TB)') parser.add_argument('--block-kb', type=int, default=64, help='scan block size in KB (default 64)') args = parser.parse_args() DISK_DATA_START = args.disk_offset global SCAN_BLOCK SCAN_BLOCK = args.block_kb * 1024 print('Opening disks (read-only)...') for path in DISKS: fd = open(path, 'rb') DISK_FDS.append(fd) print(f' {path} OK') if args.full_lv: scan(LV_START_BYTES, LV_START_BYTES + LV_SIZE_BYTES, 'full ext4 LV') else: if args.start_mb is not None: start = int(args.start_mb * 1024 * 1024) else: # Default: 64MB before LV_START to catch anything before the FS start = max(0, LV_START_BYTES - 64 * 1024 * 1024) if args.end_mb is not None: end = int(args.end_mb * 1024 * 1024) else: # Default: 64MB into the ext4 FS (covers first ~16 block groups) end = LV_START_BYTES + 64 * 1024 * 1024 scan(start, end, 'LV boundary region') for fd in DISK_FDS: fd.close() if __name__ == '__main__': main()