243 lines
10 KiB
Python
243 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Scan ext4 filesystem for orphaned directory roots.
|
|
Reads inode table directly using geometry from backup superblock.
|
|
"""
|
|
import struct, sys
|
|
from collections import defaultdict
|
|
|
|
DEV = '/dev/dm-0'
|
|
BLOCK = 4096
|
|
BACKUP_SB_BLOCK = 32768
|
|
|
|
def read_at(f, offset, size):
|
|
f.seek(offset)
|
|
return f.read(size)
|
|
|
|
def parse_superblock(data):
|
|
sb = {}
|
|
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
|
|
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
|
|
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
|
|
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
|
|
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
|
|
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
|
|
sb['feature_incompat'] = struct.unpack_from('<I', data, 96)[0]
|
|
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
|
|
return sb
|
|
|
|
def parse_gdt_entry(data, offset, desc_size):
|
|
"""Parse group descriptor - handles both 32 and 64-bit descriptors"""
|
|
inode_table_lo = struct.unpack_from('<I', data, offset + 8)[0]
|
|
if desc_size >= 64:
|
|
inode_table_hi = struct.unpack_from('<I', data, offset + 40)[0]
|
|
return inode_table_lo | (inode_table_hi << 32)
|
|
return inode_table_lo
|
|
|
|
def parse_extent_tree(data, inode_offset):
|
|
base = inode_offset + 40
|
|
magic, entries, max_entries, depth = struct.unpack_from('<HHHH', data, base)
|
|
|
|
if magic != 0xF30A:
|
|
return []
|
|
|
|
extents = []
|
|
if depth == 0:
|
|
for i in range(min(entries, 4)):
|
|
ext_off = base + 12 + i * 12
|
|
if ext_off + 12 > len(data):
|
|
break
|
|
# Correct layout: l_block(4) + ee_len(2) + ee_start_hi(2) + ee_start_lo(4)
|
|
l_block = struct.unpack_from('<I', data, ext_off)[0]
|
|
ee_len = struct.unpack_from('<H', data, ext_off + 4)[0]
|
|
start_hi = struct.unpack_from('<H', data, ext_off + 6)[0]
|
|
start_lo = struct.unpack_from('<I', data, ext_off + 8)[0]
|
|
phys = (start_hi << 32) | start_lo
|
|
if phys > 0:
|
|
extents.append((l_block, phys, ee_len & 0x7FFF))
|
|
return extents
|
|
|
|
def read_dir_entries(f, inode_data, inode_offset):
|
|
"""Read directory entries using extent tree from inode data"""
|
|
extents = parse_extent_tree(inode_data, inode_offset)
|
|
entries = {}
|
|
for _, phys_block, length in extents[:1]: # first extent is enough for . and ..
|
|
try:
|
|
data = read_at(f, phys_block * BLOCK, BLOCK)
|
|
offset = 0
|
|
while offset < BLOCK - 8:
|
|
ino, rec_len, name_len, ftype = struct.unpack_from(
|
|
'<IHBB', data, offset)
|
|
if rec_len < 8 or offset + rec_len > BLOCK:
|
|
break
|
|
if ino != 0 and name_len > 0:
|
|
name = data[offset+8:offset+8+name_len].decode(
|
|
'utf-8', errors='replace')
|
|
entries[name] = (ino, ftype)
|
|
offset += rec_len
|
|
except OSError:
|
|
pass
|
|
return entries
|
|
|
|
def parse_inode(data, offset):
|
|
if len(data) - offset < 128:
|
|
return None
|
|
mode, uid, size_lo = struct.unpack_from('<HHI', data, offset)
|
|
atime, ctime, mtime, dtime = struct.unpack_from('<IIII', data, offset + 8)
|
|
links_count = struct.unpack_from('<H', data, offset + 26)[0]
|
|
# block pointers start at offset 40, 60 bytes (12 direct + ind + dind + tind)
|
|
block0 = struct.unpack_from('<I', data, offset + 40)[0]
|
|
return {
|
|
'mode': mode,
|
|
'type': mode & 0xF000,
|
|
'links': links_count,
|
|
'ctime': ctime,
|
|
'mtime': mtime,
|
|
'block0': block0, # first direct block pointer
|
|
}
|
|
|
|
def main():
|
|
with open(DEV, 'rb') as f:
|
|
# Read backup superblock (no +1024 offset for backup blocks)
|
|
sb_data = read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
|
|
sb = parse_superblock(sb_data)
|
|
# After parsing superblock, check feature flags
|
|
|
|
INCOMPAT_EXTENTS = 0x40
|
|
uses_extents = sb['feature_incompat'] & INCOMPAT_EXTENTS
|
|
print(f"Extent trees: {'yes' if uses_extents else 'no'}")
|
|
|
|
assert sb['magic'] == 0xef53, f"Bad SB magic: {sb['magic']:#x}"
|
|
print(f"Geometry: {sb['blocks_per_group']} blk/grp, "
|
|
f"{sb['inodes_per_group']} ino/grp, "
|
|
f"inode_size={sb['inode_size']}, "
|
|
f"desc_size={sb['desc_size']}")
|
|
|
|
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
|
|
// sb['blocks_per_group']
|
|
print(f"Total groups: {num_groups}, scanning from group 13+")
|
|
|
|
# Read GDT from backup location (block after backup SB)
|
|
gdt_data = read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
|
|
num_groups * sb['desc_size'])
|
|
|
|
# Map: inode_num -> (parent_inode, group, name)
|
|
# We collect (dot_inode, dotdot_inode) for every dir we find
|
|
dir_parents = {} # inode -> parent_inode
|
|
all_dirs = set()
|
|
|
|
for grp in range(13, num_groups):
|
|
inode_table_block = parse_gdt_entry(
|
|
gdt_data, grp * sb['desc_size'], sb['desc_size'])
|
|
if inode_table_block == 0:
|
|
continue
|
|
|
|
inodes_per_block = BLOCK // sb['inode_size']
|
|
num_inode_blocks = (sb['inodes_per_group'] * sb['inode_size']
|
|
+ BLOCK - 1) // BLOCK
|
|
|
|
for blk_off in range(num_inode_blocks):
|
|
try:
|
|
idata = read_at(f,
|
|
(inode_table_block + blk_off) * BLOCK, BLOCK)
|
|
except OSError:
|
|
continue
|
|
|
|
for slot in range(inodes_per_block):
|
|
ino_off = slot * sb['inode_size']
|
|
ino = parse_inode(idata, ino_off)
|
|
if ino is None:
|
|
continue
|
|
if ino['type'] != 0x4000: # S_IFDIR
|
|
continue
|
|
if ino['links'] == 0:
|
|
continue
|
|
|
|
abs_inum = (grp * sb['inodes_per_group']
|
|
+ blk_off * inodes_per_block
|
|
+ slot + 1)
|
|
all_dirs.add(abs_inum)
|
|
# Add this debug block right after all_dirs.add(abs_inum)
|
|
# Just for the first 5 dirs found, dump raw extent header
|
|
if len(all_dirs) <= 5:
|
|
base = ino_off + 40
|
|
raw = idata[base:base+24]
|
|
magic, entries_cnt, max_e, depth = struct.unpack_from('<HHHH', raw, 0)
|
|
print(f"\nDEBUG inode {abs_inum} grp={grp}:")
|
|
print(f" raw bytes: {raw.hex()}")
|
|
print(f" extent header: magic={magic:#06x} entries={entries_cnt} depth={depth}")
|
|
if len(raw) >= 24:
|
|
l_block, len_blks, start_hi, start_lo = struct.unpack_from('<IIHH', raw, 12)
|
|
phys = (start_hi << 32) | start_lo
|
|
print(f" first extent: l_block={l_block} phys={phys} len={len_blks}")
|
|
# Try reading what's at that block
|
|
if phys > 0:
|
|
try:
|
|
ddata = read_at(f, phys * BLOCK, 32)
|
|
print(f" block {phys} first 32 bytes: {ddata.hex()}")
|
|
# Check if it looks like a dir entry
|
|
ino2, rec2, nlen2, ft2 = struct.unpack_from('<IHBB', ddata, 0)
|
|
print(f" as dir entry: inode={ino2} rec_len={rec2} name_len={nlen2}")
|
|
except OSError as e:
|
|
print(f" read error: {e}")
|
|
|
|
entries = read_dir_entries(f, idata, ino_off)
|
|
|
|
dot = entries.get('.', (None,))[0]
|
|
dotdot = entries.get('..', (None,))[0]
|
|
|
|
if dot == abs_inum and dotdot is not None:
|
|
dir_parents[abs_inum] = dotdot
|
|
|
|
if grp % 100 == 0:
|
|
print(f" scanned group {grp}/{num_groups}, "
|
|
f"dirs so far: {len(all_dirs)}",
|
|
end='\r', flush=True)
|
|
|
|
print(f"\nTotal dirs found: {len(all_dirs)}")
|
|
print(f"Dirs with readable . and ..: {len(dir_parents)}")
|
|
|
|
FIRST_GOOD_INODE = 13 * 8192 # first inode in group 13
|
|
|
|
orphan_roots = []
|
|
for inum, parent in dir_parents.items():
|
|
if parent == inum:
|
|
orphan_roots.append((inum, parent, 'self-referential'))
|
|
elif parent < FIRST_GOOD_INODE:
|
|
# parent is in zeroed region - this is a detached root
|
|
orphan_roots.append((inum, parent, 'parent-in-zeroed-region'))
|
|
elif parent not in all_dirs:
|
|
orphan_roots.append((inum, parent, 'parent-missing'))
|
|
|
|
# Build set of all orphaned inodes
|
|
orphan_inums = {inum for inum, parent, reason in orphan_roots}
|
|
|
|
# True roots: orphans whose parent is not itself an orphan
|
|
true_roots = [(inum, parent, reason)
|
|
for inum, parent, reason in orphan_roots
|
|
if parent not in orphan_inums]
|
|
|
|
print(f"\nOrphaned roots: {len(true_roots)}")
|
|
print(f"{'inode':>12} {'parent':>12} {'status':>12} {'dtime':>12} reason")
|
|
print('-' * 75)
|
|
|
|
with open(DEV, 'rb') as f:
|
|
for inum, parent, reason in sorted(true_roots):
|
|
try:
|
|
idata, slot = read_inode(f, sb, gdt_data, inum)
|
|
status = classify_inode(idata, slot)
|
|
dtime = struct.unpack_from('<I', idata, slot + 20)[0]
|
|
# Format dtime as human readable if set
|
|
if dtime:
|
|
import datetime
|
|
dt = datetime.datetime.fromtimestamp(dtime).strftime('%Y-%m-%d %H:%M:%S')
|
|
else:
|
|
dt = 'never'
|
|
except Exception:
|
|
status, dt = 'unreadable', 'unknown'
|
|
|
|
print(f"{inum:>12} {parent:>12} {status:>12} {dt:>19} {reason}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|