Files
ext4recovery/find_orphans.py
2026-04-30 11:04:05 +00:00

119 lines
4.2 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Stage 2 Identify orphaned directory trees from the inode database.
A directory is orphaned when its '..' parent inode either:
• points into the zeroed/damaged region of the disk
• does not exist in the inode table at all
• is self-referential (inum == parent inum)
A "true root" is an orphan whose own parent is NOT itself an orphan, i.e. it
is the topmost detached node of a subtree.
No device access required reads only from the SQLite database produced by
scan_inodes.py.
Usage:
python3 find_orphans.py [options]
Options:
--db PATH SQLite database from scan_inodes.py [inodes.db]
--output FILE Write inode list here [orphan_roots.txt]
--include-deleted Also include directories with dtime set (deleted)
--list Print results to stdout in addition to the file
"""
import argparse, datetime, sys
import ext4db
def find_orphan_roots(db, include_deleted=False):
"""Return list of (inum, parent_inum, status, reason) for true orphan roots."""
zeroed_groups = ext4db.get_fs_meta_int(db, 'zeroed_groups', 13)
inodes_per_group = ext4db.get_fs_meta_int(db, 'inodes_per_group', 8192)
# Highest inode number in the zeroed region
zeroed_max_inum = zeroed_groups * inodes_per_group
dir_inums = set(ext4db.get_all_dir_inums(db, include_deleted=include_deleted))
orphan_roots = []
for inum in dir_inums:
dot = ext4db.get_dot(db, inum)
dotdot = ext4db.get_dotdot(db, inum)
# Require that '.' points back to this inode — strongest confirmation
# that we read a real directory block (not garbage or a false-positive
# inode slot where mode & 0xF000 == 0x4000 by coincidence).
if dot != inum:
continue
if dotdot is None:
orphan_roots.append((inum, 0, 'no-dotdot'))
continue
if dotdot == inum:
orphan_roots.append((inum, dotdot, 'self-referential'))
elif dotdot <= zeroed_max_inum:
orphan_roots.append((inum, dotdot, 'parent-in-zeroed-region'))
elif dotdot not in dir_inums:
orphan_roots.append((inum, dotdot, 'parent-missing'))
# Keep only true roots: orphans whose declared parent is not itself an orphan
orphan_set = {inum for inum, _, _ in orphan_roots}
true_roots = [
(inum, parent, reason)
for inum, parent, reason in orphan_roots
if parent not in orphan_set
]
# Attach inode status
result = []
for inum, parent, reason in sorted(true_roots):
row = ext4db.get_inode(db, inum)
status = row['status'] if row else 'unknown'
result.append((inum, parent, status, reason))
return result
def format_dtime(db, inum):
row = ext4db.get_inode(db, inum)
if not row or not row['dtime']:
return 'never'
try:
return datetime.datetime.fromtimestamp(row['dtime']).strftime('%Y-%m-%d %H:%M:%S')
except (OSError, OverflowError):
return str(row['dtime'])
def main():
parser = argparse.ArgumentParser(description='Find orphaned directory roots (Stage 2)')
parser.add_argument('--db', default='inodes.db')
parser.add_argument('--output', default='orphan_roots.txt')
parser.add_argument('--include-deleted', action='store_true')
parser.add_argument('--list', action='store_true',
help='Print table to stdout')
args = parser.parse_args()
db = ext4db.open_db(args.db)
roots = find_orphan_roots(db, include_deleted=args.include_deleted)
if args.list or args.output == '-':
print(f"{'inode':>12} {'parent':>12} {'status':>12} {'deleted':>19} reason")
print('-' * 80)
for inum, parent, status, reason in roots:
dt = format_dtime(db, inum)
print(f"{inum:>12} {parent:>12} {status:>12} {dt:>19} {reason}")
print(f"\nTotal orphan roots: {len(roots)}")
if args.output and args.output != '-':
with open(args.output, 'w') as fh:
for inum, parent, status, reason in roots:
fh.write(f"{inum} {parent} {status} {reason}\n")
print(f"Wrote {len(roots)} orphan roots to {args.output}")
if __name__ == '__main__':
main()