Files
ext4recovery/test/scan_inodes.py.bak
2026-04-30 11:04:05 +00:00

247 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Scan ext4 filesystem for orphaned directory roots.
Reads inode table directly using geometry from backup superblock.
"""
import struct, sys
from collections import defaultdict
DEV = '/dev/dm-0'
BLOCK = 4096
BACKUP_SB_BLOCK = 32768
def read_at(f, offset, size):
f.seek(offset)
return f.read(size)
def parse_superblock(data):
sb = {}
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
sb['feature_incompat'] = struct.unpack_from('<I', data, 96)[0]
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
return sb
def parse_gdt_entry(data, offset, desc_size):
"""Parse group descriptor - handles both 32 and 64-bit descriptors"""
inode_table_lo = struct.unpack_from('<I', data, offset + 8)[0]
if desc_size >= 64:
inode_table_hi = struct.unpack_from('<I', data, offset + 40)[0]
return inode_table_lo | (inode_table_hi << 32)
return inode_table_lo
def parse_extent_tree(data, inode_offset):
base = inode_offset + 40
magic, entries, max_entries, depth = struct.unpack_from('<HHHH', data, base)
if magic != 0xF30A:
return []
extents = []
if depth == 0:
for i in range(min(entries, 4)):
ext_off = base + 12 + i * 12
if ext_off + 12 > len(data):
break
# Correct layout: l_block(4) + ee_len(2) + ee_start_hi(2) + ee_start_lo(4)
l_block = struct.unpack_from('<I', data, ext_off)[0]
ee_len = struct.unpack_from('<H', data, ext_off + 4)[0]
start_hi = struct.unpack_from('<H', data, ext_off + 6)[0]
start_lo = struct.unpack_from('<I', data, ext_off + 8)[0]
phys = (start_hi << 32) | start_lo
if phys > 0:
extents.append((l_block, phys, ee_len & 0x7FFF))
return extents
def read_dir_entries(f, inode_data, inode_offset):
"""Read directory entries using extent tree from inode data"""
extents = parse_extent_tree(inode_data, inode_offset)
entries = {}
for _, phys_block, length in extents[:1]: # first extent is enough for . and ..
try:
data = read_at(f, phys_block * BLOCK, BLOCK)
offset = 0
while offset < BLOCK - 8:
ino, rec_len, name_len, ftype = struct.unpack_from(
'<IHBB', data, offset)
if rec_len < 8 or offset + rec_len > BLOCK:
break
if ino != 0 and name_len > 0:
name = data[offset+8:offset+8+name_len].decode(
'utf-8', errors='replace')
entries[name] = (ino, ftype)
offset += rec_len
except OSError:
pass
return entries
def parse_inode(data, offset):
if len(data) - offset < 128:
return None
mode, uid, size_lo = struct.unpack_from('<HHI', data, offset)
atime, ctime, mtime, dtime = struct.unpack_from('<IIII', data, offset + 8)
links_count = struct.unpack_from('<H', data, offset + 26)[0]
# block pointers start at offset 40, 60 bytes (12 direct + ind + dind + tind)
block0 = struct.unpack_from('<I', data, offset + 40)[0]
return {
'mode': mode,
'type': mode & 0xF000,
'links': links_count,
'ctime': ctime,
'mtime': mtime,
'block0': block0, # first direct block pointer
}
def main():
with open(DEV, 'rb') as f:
# Read backup superblock (no +1024 offset for backup blocks)
sb_data = read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
sb = parse_superblock(sb_data)
# After parsing superblock, check feature flags
INCOMPAT_EXTENTS = 0x40
uses_extents = sb['feature_incompat'] & INCOMPAT_EXTENTS
print(f"Extent trees: {'yes' if uses_extents else 'no'}")
assert sb['magic'] == 0xef53, f"Bad SB magic: {sb['magic']:#x}"
print(f"Geometry: {sb['blocks_per_group']} blk/grp, "
f"{sb['inodes_per_group']} ino/grp, "
f"inode_size={sb['inode_size']}, "
f"desc_size={sb['desc_size']}")
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
// sb['blocks_per_group']
print(f"Total groups: {num_groups}, scanning from group 13+")
# Read GDT from backup location (block after backup SB)
gdt_data = read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
num_groups * sb['desc_size'])
# Map: inode_num -> (parent_inode, group, name)
# We collect (dot_inode, dotdot_inode) for every dir we find
dir_parents = {} # inode -> parent_inode
all_dirs = set()
for grp in range(13, num_groups):
inode_table_block = parse_gdt_entry(
gdt_data, grp * sb['desc_size'], sb['desc_size'])
if inode_table_block == 0:
continue
inodes_per_block = BLOCK // sb['inode_size']
num_inode_blocks = (sb['inodes_per_group'] * sb['inode_size']
+ BLOCK - 1) // BLOCK
for blk_off in range(num_inode_blocks):
try:
idata = read_at(f,
(inode_table_block + blk_off) * BLOCK, BLOCK)
except OSError:
continue
for slot in range(inodes_per_block):
ino_off = slot * sb['inode_size']
ino = parse_inode(idata, ino_off)
if ino is None:
continue
if ino['type'] != 0x4000: # S_IFDIR
continue
if ino['links'] == 0:
continue
abs_inum = (grp * sb['inodes_per_group']
+ blk_off * inodes_per_block
+ slot + 1)
all_dirs.add(abs_inum)
# Add this debug block right after all_dirs.add(abs_inum)
# Just for the first 5 dirs found, dump raw extent header
if len(all_dirs) <= 5:
base = ino_off + 40
raw = idata[base:base+24]
magic, entries_cnt, max_e, depth = struct.unpack_from('<HHHH', raw, 0)
print(f"\nDEBUG inode {abs_inum} grp={grp}:")
print(f" raw bytes: {raw.hex()}")
print(f" extent header: magic={magic:#06x} entries={entries_cnt} depth={depth}")
if len(raw) >= 24:
l_block, len_blks, start_hi, start_lo = struct.unpack_from('<IIHH', raw, 12)
phys = (start_hi << 32) | start_lo
print(f" first extent: l_block={l_block} phys={phys} len={len_blks}")
# Try reading what's at that block
if phys > 0:
try:
ddata = read_at(f, phys * BLOCK, 32)
print(f" block {phys} first 32 bytes: {ddata.hex()}")
# Check if it looks like a dir entry
ino2, rec2, nlen2, ft2 = struct.unpack_from('<IHBB', ddata, 0)
print(f" as dir entry: inode={ino2} rec_len={rec2} name_len={nlen2}")
except OSError as e:
print(f" read error: {e}")
entries = read_dir_entries(f, idata, ino_off)
dot = entries.get('.', (None,))[0]
dotdot = entries.get('..', (None,))[0]
if dot == abs_inum and dotdot is not None:
dir_parents[abs_inum] = dotdot
if grp % 100 == 0:
print(f" scanned group {grp}/{num_groups}, "
f"dirs so far: {len(all_dirs)}",
end='\r', flush=True)
print(f"\nTotal dirs found: {len(all_dirs)}")
print(f"Dirs with readable . and ..: {len(dir_parents)}")
FIRST_GOOD_INODE = 13 * 8192 # first inode in group 13
orphan_roots = []
for inum, parent in dir_parents.items():
if parent == inum:
orphan_roots.append((inum, parent, 'self-referential'))
elif parent < FIRST_GOOD_INODE:
# parent is in zeroed region - this is a detached root
orphan_roots.append((inum, parent, 'parent-in-zeroed-region'))
elif parent not in all_dirs:
orphan_roots.append((inum, parent, 'parent-missing'))
print(f"\nOrphaned roots: {len(orphan_roots)}")
print(f"{'inode':>12} {'parent':>12} reason")
print('-' * 45)
for inum, parent, reason in sorted(orphan_roots):
print(f"{inum:>12} {parent:>12} {reason}")
# Add this after the orphan_roots list is built
# Build set of all orphaned inodes
orphan_inums = {inum for inum, parent, reason in orphan_roots}
# True roots: orphans whose parent is not itself an orphan
true_roots = [(inum, parent, reason)
for inum, parent, reason in orphan_roots
if parent not in orphan_inums]
print(f"\nTrue detached tree roots: {len(true_roots)}")
print(f"{'inode':>12} {'parent':>12} reason")
print('-' * 55)
for inum, parent, reason in sorted(true_roots):
# Try to get first few dir entries to identify the tree
with open(DEV, 'rb') as f:
grp = (inum - 1) // sb['inodes_per_group']
local_idx = (inum - 1) % sb['inodes_per_group']
inode_table_block = parse_gdt_entry(
gdt_data, grp * sb['desc_size'], sb['desc_size'])
blk_off = (local_idx * sb['inode_size']) // BLOCK
slot = (local_idx * sb['inode_size']) % BLOCK
idata = read_at(f, (inode_table_block + blk_off) * BLOCK, BLOCK)
entries = read_dir_entries(f, idata, slot)
# Show entries excluding . and ..
names = [k for k in entries if k not in ('.', '..')][:5]
print(f"{inum:>12} {parent:>12} {names}")
if __name__ == '__main__':
main()