#!/usr/bin/env python3 """ Scan ext4 filesystem for orphaned directory roots. Reads inode table directly using geometry from backup superblock. """ import struct, sys from collections import defaultdict DEV = '/dev/dm-0' BLOCK = 4096 BACKUP_SB_BLOCK = 32768 def read_at(f, offset, size): f.seek(offset) return f.read(size) def parse_superblock(data): sb = {} sb['inodes_count'] = struct.unpack_from('= 64: inode_table_hi = struct.unpack_from(' len(data): break # Correct layout: l_block(4) + ee_len(2) + ee_start_hi(2) + ee_start_lo(4) l_block = struct.unpack_from(' 0: extents.append((l_block, phys, ee_len & 0x7FFF)) return extents def read_dir_entries(f, inode_data, inode_offset): """Read directory entries using extent tree from inode data""" extents = parse_extent_tree(inode_data, inode_offset) entries = {} for _, phys_block, length in extents[:1]: # first extent is enough for . and .. try: data = read_at(f, phys_block * BLOCK, BLOCK) offset = 0 while offset < BLOCK - 8: ino, rec_len, name_len, ftype = struct.unpack_from( ' BLOCK: break if ino != 0 and name_len > 0: name = data[offset+8:offset+8+name_len].decode( 'utf-8', errors='replace') entries[name] = (ino, ftype) offset += rec_len except OSError: pass return entries def parse_inode(data, offset): if len(data) - offset < 128: return None mode, uid, size_lo = struct.unpack_from(' (parent_inode, group, name) # We collect (dot_inode, dotdot_inode) for every dir we find dir_parents = {} # inode -> parent_inode all_dirs = set() for grp in range(13, num_groups): inode_table_block = parse_gdt_entry( gdt_data, grp * sb['desc_size'], sb['desc_size']) if inode_table_block == 0: continue inodes_per_block = BLOCK // sb['inode_size'] num_inode_blocks = (sb['inodes_per_group'] * sb['inode_size'] + BLOCK - 1) // BLOCK for blk_off in range(num_inode_blocks): try: idata = read_at(f, (inode_table_block + blk_off) * BLOCK, BLOCK) except OSError: continue for slot in range(inodes_per_block): ino_off = slot * sb['inode_size'] ino = parse_inode(idata, ino_off) if ino is None: continue if ino['type'] != 0x4000: # S_IFDIR continue if ino['links'] == 0: continue abs_inum = (grp * sb['inodes_per_group'] + blk_off * inodes_per_block + slot + 1) all_dirs.add(abs_inum) # Add this debug block right after all_dirs.add(abs_inum) # Just for the first 5 dirs found, dump raw extent header if len(all_dirs) <= 5: base = ino_off + 40 raw = idata[base:base+24] magic, entries_cnt, max_e, depth = struct.unpack_from('= 24: l_block, len_blks, start_hi, start_lo = struct.unpack_from(' 0: try: ddata = read_at(f, phys * BLOCK, 32) print(f" block {phys} first 32 bytes: {ddata.hex()}") # Check if it looks like a dir entry ino2, rec2, nlen2, ft2 = struct.unpack_from('12} {'parent':>12} reason") print('-' * 45) for inum, parent, reason in sorted(orphan_roots): print(f"{inum:>12} {parent:>12} {reason}") # Add this after the orphan_roots list is built # Build set of all orphaned inodes orphan_inums = {inum for inum, parent, reason in orphan_roots} # True roots: orphans whose parent is not itself an orphan true_roots = [(inum, parent, reason) for inum, parent, reason in orphan_roots if parent not in orphan_inums] print(f"\nTrue detached tree roots: {len(true_roots)}") print(f"{'inode':>12} {'parent':>12} reason") print('-' * 55) for inum, parent, reason in sorted(true_roots): # Try to get first few dir entries to identify the tree with open(DEV, 'rb') as f: grp = (inum - 1) // sb['inodes_per_group'] local_idx = (inum - 1) % sb['inodes_per_group'] inode_table_block = parse_gdt_entry( gdt_data, grp * sb['desc_size'], sb['desc_size']) blk_off = (local_idx * sb['inode_size']) // BLOCK slot = (local_idx * sb['inode_size']) % BLOCK idata = read_at(f, (inode_table_block + blk_off) * BLOCK, BLOCK) entries = read_dir_entries(f, idata, slot) # Show entries excluding . and .. names = [k for k in entries if k not in ('.', '..')][:5] print(f"{inum:>12} {parent:>12} {names}") if __name__ == '__main__': main()