Files
ext4recovery/test/scan_inodes_for_specific_names_raw.py
2026-04-30 11:04:05 +00:00

151 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""
Scan inode tables directly for directory inodes,
then read their data blocks looking for target names.
Much faster than full disk scan.
"""
import struct, os
CHUNK = 128 * 512
LV_START = 5120000 * 512
BSIZE = 4096
DISKS = ['/dev/sda', '/dev/sde', '/dev/sdd', '/dev/sdc']
IPG = 8192
INODE_SZ = 256
BPG = 32768
NUM_GROUPS = 35728
MIN_GROUP = 13 # groups 0-12 are zeroed
TARGETS = [
b'pterodactyl',
b'var',
b'mysql',
b'www',
b'log',
b'docker',
b'nginx',
b'apache2',
b'www-data',
]
def v_to_p(virt_byte):
"""Virtual byte offset to physical (disk 0) byte offset."""
group = virt_byte // (5 * CHUNK)
in_group = virt_byte % (5 * CHUNK)
chunk_idx = in_group // CHUNK
intra = in_group % CHUNK
if chunk_idx == 4:
return None
return LV_START + group*4*CHUNK + chunk_idx*CHUNK + intra
def read_virt(f, virt_byte, length):
"""Read from virtual address space via disk 0."""
phys = v_to_p(virt_byte)
if phys is None:
return b'\x00' * length
f.seek(phys)
return f.read(length)
def is_valid_dirent(block, off, name):
if off + 8 + len(name) > BSIZE: return False
inode = struct.unpack_from('<I', block, off)[0]
rec_len = struct.unpack_from('<H', block, off+4)[0]
name_len = block[off+6]
ftype = block[off+7]
if not (10 < inode < 500_000_000): return False
if name_len != len(name): return False
if rec_len < 8+name_len or rec_len > BSIZE or rec_len%4 != 0: return False
if ftype not in (1,2,7): return False
if block[off+8:off+8+name_len] != name: return False
pad = off+8+name_len
if pad < BSIZE and block[pad] != 0: return False
return True
def parse_extents(inode_data):
"""Get list of physical block numbers from extent tree."""
blocks = []
magic = struct.unpack_from('<H', inode_data, 40)[0]
if magic != 0xf30a:
return blocks
depth = struct.unpack_from('<H', inode_data, 46)[0]
entries = struct.unpack_from('<H', inode_data, 42)[0]
if depth == 0:
for i in range(min(entries, 4)):
off = 52 + i*12
ee_len = struct.unpack_from('<H', inode_data, off+4)[0]
ee_hi = struct.unpack_from('<H', inode_data, off+6)[0]
ee_lo = struct.unpack_from('<I', inode_data, off+8)[0]
ee_start = (ee_hi << 32) | ee_lo
for b in range(min(ee_len, 8)): # max 8 blocks per dir
blocks.append(ee_start + b)
return blocks
results = {}
print('Scanning inode tables directly...')
print(f'Groups to scan: {MIN_GROUP} to {NUM_GROUPS-1}')
print()
with open('/dev/sda', 'rb', buffering=0) as f:
for group in range(MIN_GROUP, NUM_GROUPS):
# Inode table for group N is at block 1070 + N*512
it_block = 1070 + group * 512
it_virt = it_block * BSIZE
# Read entire inode table for this group
inode_table = read_virt(f, it_virt, IPG * INODE_SZ)
for idx in range(IPG):
inode_data = inode_table[idx*INODE_SZ:(idx+1)*INODE_SZ]
if not any(inode_data):
continue
mode = struct.unpack_from('<H', inode_data, 0)[0]
links = struct.unpack_from('<H', inode_data, 26)[0]
size = struct.unpack_from('<I', inode_data, 4)[0]
# Check if directory: mode & 0xf000 == 0x4000
if (mode & 0xf000) != 0x4000:
continue
if links < 2:
continue
inode_num = group * IPG + idx + 1
# Read directory data blocks and scan for targets
blocks = parse_extents(inode_data)
for blk in blocks:
blk_virt = blk * BSIZE
blk_data = read_virt(f, blk_virt, BSIZE)
for target in TARGETS:
if target not in blk_data:
continue
for off in range(0, BSIZE-8):
if blk_data[off+8:off+8+len(target)] != target:
continue
if is_valid_dirent(blk_data, off, target):
child_ino = struct.unpack_from('<I',blk_data,off)[0]
ftype = blk_data[off+7]
child_grp = (child_ino-1)//IPG
key = (target.decode(), child_ino)
if key not in results:
results[key] = (inode_num, ftype, child_grp)
status = 'INTACT' if child_grp>=13 else 'LOST'
tname = {1:'file',2:'dir',7:'link'}.get(ftype,'?')
print(f'[{status}] {target.decode()!r:15s} '
f'child_inode={child_ino:10d} '
f'parent_inode={inode_num:10d} '
f'type={tname}')
if group % 1000 == 0:
print(f' Group {group}/{NUM_GROUPS}...', flush=True)
print()
print('=== SUMMARY ===')
for (name, child_ino), (parent_ino, ftype, grp) in sorted(results.items()):
status = 'INTACT' if grp >= 13 else 'LOST'
tname = {1:'file',2:'dir',7:'link'}.get(ftype,'?')
print(f'[{status}] {name!r:15s} '
f'child={child_ino} parent={parent_ino} type={tname}')