#!/usr/bin/env python3 """ Stage 2 – Identify orphaned directory trees from the inode database. A directory is orphaned when its '..' parent inode either: • points into the zeroed/damaged region of the disk • does not exist in the inode table at all • is self-referential (inum == parent inum) A "true root" is an orphan whose own parent is NOT itself an orphan, i.e. it is the topmost detached node of a subtree. No device access required – reads only from the SQLite database produced by scan_inodes.py. Usage: python3 find_orphans.py [options] Options: --db PATH SQLite database from scan_inodes.py [inodes.db] --output FILE Write inode list here [orphan_roots.txt] --include-deleted Also include directories with dtime set (deleted) --list Print results to stdout in addition to the file """ import argparse, datetime, sys import ext4db def find_orphan_roots(db, include_deleted=False): """Return list of (inum, parent_inum, status, reason) for true orphan roots.""" zeroed_groups = ext4db.get_fs_meta_int(db, 'zeroed_groups', 13) inodes_per_group = ext4db.get_fs_meta_int(db, 'inodes_per_group', 8192) # Highest inode number in the zeroed region zeroed_max_inum = zeroed_groups * inodes_per_group dir_inums = set(ext4db.get_all_dir_inums(db, include_deleted=include_deleted)) orphan_roots = [] for inum in dir_inums: dot = ext4db.get_dot(db, inum) dotdot = ext4db.get_dotdot(db, inum) # Require that '.' points back to this inode — strongest confirmation # that we read a real directory block (not garbage or a false-positive # inode slot where mode & 0xF000 == 0x4000 by coincidence). if dot != inum: continue if dotdot is None: orphan_roots.append((inum, 0, 'no-dotdot')) continue if dotdot == inum: orphan_roots.append((inum, dotdot, 'self-referential')) elif dotdot <= zeroed_max_inum: orphan_roots.append((inum, dotdot, 'parent-in-zeroed-region')) elif dotdot not in dir_inums: orphan_roots.append((inum, dotdot, 'parent-missing')) # Keep only true roots: orphans whose declared parent is not itself an orphan orphan_set = {inum for inum, _, _ in orphan_roots} true_roots = [ (inum, parent, reason) for inum, parent, reason in orphan_roots if parent not in orphan_set ] # Attach inode status result = [] for inum, parent, reason in sorted(true_roots): row = ext4db.get_inode(db, inum) status = row['status'] if row else 'unknown' result.append((inum, parent, status, reason)) return result def format_dtime(db, inum): row = ext4db.get_inode(db, inum) if not row or not row['dtime']: return 'never' try: return datetime.datetime.fromtimestamp(row['dtime']).strftime('%Y-%m-%d %H:%M:%S') except (OSError, OverflowError): return str(row['dtime']) def main(): parser = argparse.ArgumentParser(description='Find orphaned directory roots (Stage 2)') parser.add_argument('--db', default='inodes.db') parser.add_argument('--output', default='orphan_roots.txt') parser.add_argument('--include-deleted', action='store_true') parser.add_argument('--list', action='store_true', help='Print table to stdout') args = parser.parse_args() db = ext4db.open_db(args.db) roots = find_orphan_roots(db, include_deleted=args.include_deleted) if args.list or args.output == '-': print(f"{'inode':>12} {'parent':>12} {'status':>12} {'deleted':>19} reason") print('-' * 80) for inum, parent, status, reason in roots: dt = format_dtime(db, inum) print(f"{inum:>12} {parent:>12} {status:>12} {dt:>19} {reason}") print(f"\nTotal orphan roots: {len(roots)}") if args.output and args.output != '-': with open(args.output, 'w') as fh: for inum, parent, status, reason in roots: fh.write(f"{inum} {parent} {status} {reason}\n") print(f"Wrote {len(roots)} orphan roots to {args.output}") if __name__ == '__main__': main()