Initial remote commit

This commit is contained in:
2026-04-30 11:04:05 +00:00
commit b86e4f9a98
103 changed files with 262770 additions and 0 deletions

118
find_orphans.py Executable file
View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""
Stage 2 Identify orphaned directory trees from the inode database.
A directory is orphaned when its '..' parent inode either:
• points into the zeroed/damaged region of the disk
• does not exist in the inode table at all
• is self-referential (inum == parent inum)
A "true root" is an orphan whose own parent is NOT itself an orphan, i.e. it
is the topmost detached node of a subtree.
No device access required reads only from the SQLite database produced by
scan_inodes.py.
Usage:
python3 find_orphans.py [options]
Options:
--db PATH SQLite database from scan_inodes.py [inodes.db]
--output FILE Write inode list here [orphan_roots.txt]
--include-deleted Also include directories with dtime set (deleted)
--list Print results to stdout in addition to the file
"""
import argparse, datetime, sys
import ext4db
def find_orphan_roots(db, include_deleted=False):
"""Return list of (inum, parent_inum, status, reason) for true orphan roots."""
zeroed_groups = ext4db.get_fs_meta_int(db, 'zeroed_groups', 13)
inodes_per_group = ext4db.get_fs_meta_int(db, 'inodes_per_group', 8192)
# Highest inode number in the zeroed region
zeroed_max_inum = zeroed_groups * inodes_per_group
dir_inums = set(ext4db.get_all_dir_inums(db, include_deleted=include_deleted))
orphan_roots = []
for inum in dir_inums:
dot = ext4db.get_dot(db, inum)
dotdot = ext4db.get_dotdot(db, inum)
# Require that '.' points back to this inode — strongest confirmation
# that we read a real directory block (not garbage or a false-positive
# inode slot where mode & 0xF000 == 0x4000 by coincidence).
if dot != inum:
continue
if dotdot is None:
orphan_roots.append((inum, 0, 'no-dotdot'))
continue
if dotdot == inum:
orphan_roots.append((inum, dotdot, 'self-referential'))
elif dotdot <= zeroed_max_inum:
orphan_roots.append((inum, dotdot, 'parent-in-zeroed-region'))
elif dotdot not in dir_inums:
orphan_roots.append((inum, dotdot, 'parent-missing'))
# Keep only true roots: orphans whose declared parent is not itself an orphan
orphan_set = {inum for inum, _, _ in orphan_roots}
true_roots = [
(inum, parent, reason)
for inum, parent, reason in orphan_roots
if parent not in orphan_set
]
# Attach inode status
result = []
for inum, parent, reason in sorted(true_roots):
row = ext4db.get_inode(db, inum)
status = row['status'] if row else 'unknown'
result.append((inum, parent, status, reason))
return result
def format_dtime(db, inum):
row = ext4db.get_inode(db, inum)
if not row or not row['dtime']:
return 'never'
try:
return datetime.datetime.fromtimestamp(row['dtime']).strftime('%Y-%m-%d %H:%M:%S')
except (OSError, OverflowError):
return str(row['dtime'])
def main():
parser = argparse.ArgumentParser(description='Find orphaned directory roots (Stage 2)')
parser.add_argument('--db', default='inodes.db')
parser.add_argument('--output', default='orphan_roots.txt')
parser.add_argument('--include-deleted', action='store_true')
parser.add_argument('--list', action='store_true',
help='Print table to stdout')
args = parser.parse_args()
db = ext4db.open_db(args.db)
roots = find_orphan_roots(db, include_deleted=args.include_deleted)
if args.list or args.output == '-':
print(f"{'inode':>12} {'parent':>12} {'status':>12} {'deleted':>19} reason")
print('-' * 80)
for inum, parent, status, reason in roots:
dt = format_dtime(db, inum)
print(f"{inum:>12} {parent:>12} {status:>12} {dt:>19} {reason}")
print(f"\nTotal orphan roots: {len(roots)}")
if args.output and args.output != '-':
with open(args.output, 'w') as fh:
for inum, parent, status, reason in roots:
fh.write(f"{inum} {parent} {status} {reason}\n")
print(f"Wrote {len(roots)} orphan roots to {args.output}")
if __name__ == '__main__':
main()