Files
ext4recovery/test/scan.py
2026-04-30 11:04:05 +00:00

155 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Strict ext4 directory entry scanner for pterodactyl paths.
"""
import struct
CHUNK = 128 * 512
LV_START = 5120000 * 512
BSIZE = 4096
DISKS = ['/dev/sda', '/dev/sdd', '/dev/sdc', '/dev/sdb']
# Only exact target names we expect as directory entries
EXACT_TARGETS = [
b'pterodactyl',
b'volumes',
b'wings',
]
def is_valid_dirent(block, off, name):
"""Strict validation of an ext4 directory entry."""
if off + 8 + len(name) > BSIZE:
return False
inode = struct.unpack_from('<I', block, off)[0]
rec_len = struct.unpack_from('<H', block, off+4)[0]
name_len = block[off+6]
ftype = block[off+7]
# inode must be plausible (> 10, not absurdly large)
if not (10 < inode < 500_000_000):
return False
# name_len must exactly match our target
if name_len != len(name):
return False
# rec_len must be >= 8 + name_len and <= 4096
# and aligned to 4 bytes
min_rec = 8 + name_len
if rec_len < min_rec or rec_len > BSIZE or rec_len % 4 != 0:
return False
# file type must be a known ext4 type
if ftype not in (1, 2, 7): # file, dir, symlink only
return False
# the name bytes must match exactly and be clean ASCII
actual_name = block[off+8:off+8+name_len]
if actual_name != name:
return False
# byte immediately after name (padding) should be 0
pad_off = off + 8 + name_len
if pad_off < BSIZE and block[pad_off] != 0:
return False
# Previous entry should also look valid if we're not at start of block
# (skip this check for now - too complex)
return True
def scan_block(block, phys_base):
hits = []
for off in range(0, BSIZE - 8):
for target in EXACT_TARGETS:
# Quick check: does target appear at this offset+8?
if block[off+8:off+8+len(target)] != target:
continue
if is_valid_dirent(block, off, target):
inode = struct.unpack_from('<I', block, off)[0]
rec_len = struct.unpack_from('<H', block, off+4)[0]
ftype = block[off+7]
grp = (inode - 1) // 8192
hits.append({
'phys': phys_base + off,
'inode': inode,
'name': target.decode(),
'ftype': {1:'file',2:'dir',7:'symlink'}.get(ftype,'?'),
'group': grp,
'intact': grp >= 13,
'rec_len': rec_len,
})
return hits
def iter_data_chunks(disk_path):
with open(disk_path, 'rb') as f:
f.seek(0, 2)
disk_size = f.tell()
chunk_num = 0
with open(disk_path, 'rb') as f:
phys = LV_START
while phys + CHUNK <= disk_size:
if chunk_num % 5 != 4:
f.seek(phys)
yield phys, f.read(CHUNK)
phys += CHUNK
chunk_num += 1
def main():
all_hits = []
for disk_idx, disk in enumerate(DISKS):
print(f'\nScanning {disk}...', flush=True)
chunks = 0
hits = 0
for phys, chunk_data in iter_data_chunks(disk):
# Pre-filter: any target in chunk?
if not any(t in chunk_data for t in EXACT_TARGETS):
chunks += 1
continue
# Scan each 4KB block in chunk
for blk in range(0, len(chunk_data), BSIZE):
block = chunk_data[blk:blk+BSIZE]
for hit in scan_block(block, phys + blk):
status = 'INTACT' if hit['intact'] else 'LOST'
print(f" [{status}] '{hit['name']}' "
f"inode={hit['inode']} "
f"group={hit['group']} "
f"type={hit['ftype']} "
f"phys={hit['phys']}")
all_hits.append((disk_idx, hit))
hits += 1
chunks += 1
if chunks % 5000 == 0:
gb = (phys - LV_START) / 1024**3
print(f' {disk}: {gb:.1f}GB, {hits} hits', flush=True)
print(f' Finished: {hits} hits')
print('\n=== RESULTS ===')
# Group by name and inode
from collections import defaultdict
by_inode = defaultdict(list)
for disk_idx, hit in all_hits:
key = (hit['inode'], hit['name'])
by_inode[key].append((DISKS[disk_idx], hit['phys']))
print(f'\nUnique (inode, name) pairs: {len(by_inode)}')
for (inode, name), locations in sorted(by_inode.items()):
grp = (inode-1)//8192
status = 'INTACT' if grp >= 13 else 'LOST'
print(f" '{name}' inode={inode} group={grp} [{status}]")
for disk, phys in locations[:3]:
print(f" {disk} phys={phys}")
if __name__ == '__main__':
main()