Initial remote commit
This commit is contained in:
207
test/recursivedump.py
Normal file
207
test/recursivedump.py
Normal file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Recursive ext4 directory dumper by inode number.
|
||||
Bypasses all metadata validation - uses extent trees directly.
|
||||
"""
|
||||
import struct, os, sys, stat
|
||||
from pathlib import Path
|
||||
|
||||
DEV = '/dev/dm-0'
|
||||
BLOCK = 4096
|
||||
BACKUP_SB_BLOCK = 32768
|
||||
|
||||
# ── low-level helpers ────────────────────────────────────────────────────────
|
||||
|
||||
def read_at(f, offset, size):
|
||||
f.seek(offset)
|
||||
return f.read(size)
|
||||
|
||||
def parse_superblock(data):
|
||||
sb = {}
|
||||
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
|
||||
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
|
||||
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
|
||||
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
|
||||
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
|
||||
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
|
||||
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
|
||||
return sb
|
||||
|
||||
def parse_gdt_entry(gdt_data, offset, desc_size):
|
||||
lo = struct.unpack_from('<I', gdt_data, offset + 8)[0]
|
||||
if desc_size >= 64:
|
||||
hi = struct.unpack_from('<I', gdt_data, offset + 40)[0]
|
||||
return lo | (hi << 32)
|
||||
return lo
|
||||
|
||||
def parse_extent_tree(data, inode_offset):
|
||||
base = inode_offset + 40
|
||||
magic, entries, _, depth = struct.unpack_from('<HHHH', data, base)
|
||||
if magic != 0xF30A:
|
||||
return []
|
||||
extents = []
|
||||
if depth == 0:
|
||||
for i in range(min(entries, 4)):
|
||||
o = base + 12 + i * 12
|
||||
if o + 12 > len(data): break
|
||||
l_block = struct.unpack_from('<I', data, o )[0]
|
||||
ee_len = struct.unpack_from('<H', data, o + 4)[0]
|
||||
start_hi = struct.unpack_from('<H', data, o + 6)[0]
|
||||
start_lo = struct.unpack_from('<I', data, o + 8)[0]
|
||||
phys = (start_hi << 32) | start_lo
|
||||
if phys > 0:
|
||||
extents.append((l_block, phys, ee_len & 0x7FFF))
|
||||
else:
|
||||
# Depth > 0: extent index node - follow first child
|
||||
# (handles large dirs gracefully)
|
||||
o = base + 12
|
||||
ei_leaf_lo = struct.unpack_from('<I', data, o + 4)[0]
|
||||
ei_leaf_hi = struct.unpack_from('<H', data, o + 8)[0]
|
||||
extents.append((0, (ei_leaf_hi << 32) | ei_leaf_lo, 1))
|
||||
return extents
|
||||
|
||||
def read_inode(f, sb, gdt_data, inum):
|
||||
"""Return raw inode block data and offset within it."""
|
||||
grp = (inum - 1) // sb['inodes_per_group']
|
||||
local_idx = (inum - 1) % sb['inodes_per_group']
|
||||
tbl_block = parse_gdt_entry(gdt_data, grp * sb['desc_size'], sb['desc_size'])
|
||||
byte_off = local_idx * sb['inode_size']
|
||||
blk_off = byte_off // BLOCK
|
||||
slot = byte_off % BLOCK
|
||||
data = read_at(f, (tbl_block + blk_off) * BLOCK, BLOCK)
|
||||
return data, slot
|
||||
|
||||
def read_dir_entries(f, sb, gdt_data, inum):
|
||||
"""Return dict of name -> (child_inum, ftype)."""
|
||||
idata, slot = read_inode(f, sb, gdt_data, inum)
|
||||
entries = {}
|
||||
for _, phys, length in parse_extent_tree(idata, slot):
|
||||
for blk in range(length):
|
||||
try:
|
||||
bdata = read_at(f, (phys + blk) * BLOCK, BLOCK)
|
||||
offset = 0
|
||||
while offset < BLOCK - 8:
|
||||
e_ino, rec_len, name_len, ftype = \
|
||||
struct.unpack_from('<IHBB', bdata, offset)
|
||||
if rec_len < 8 or offset + rec_len > BLOCK:
|
||||
break
|
||||
if e_ino != 0 and name_len > 0:
|
||||
name = bdata[offset+8:offset+8+name_len]\
|
||||
.decode('utf-8', errors='replace')
|
||||
entries[name] = (e_ino, ftype)
|
||||
offset += rec_len
|
||||
except OSError:
|
||||
pass
|
||||
return entries
|
||||
|
||||
def dump_file(f, sb, gdt_data, inum, dest_path):
|
||||
"""Extract a regular file by inode to dest_path."""
|
||||
try:
|
||||
idata, slot = read_inode(f, sb, gdt_data, inum)
|
||||
size_lo = struct.unpack_from('<I', idata, slot + 4)[0]
|
||||
size_hi = struct.unpack_from('<I', idata, slot + 108)[0]
|
||||
size = size_lo | (size_hi << 32)
|
||||
extents = parse_extent_tree(idata, slot)
|
||||
|
||||
# Check for inline data (EXT4_INLINE_DATA_FL = 0x10000000)
|
||||
flags = struct.unpack_from('<I', idata, slot + 32)[0]
|
||||
if flags & 0x10000000:
|
||||
# Data stored in inode body - skip for now
|
||||
return False
|
||||
|
||||
written = 0
|
||||
with open(dest_path, 'wb') as out:
|
||||
for _, phys, length in sorted(extents):
|
||||
for blk in range(length):
|
||||
if written >= size:
|
||||
break
|
||||
chunk = read_at(f, (phys + blk) * BLOCK, BLOCK)
|
||||
remaining = size - written
|
||||
out.write(chunk[:remaining] if remaining < BLOCK else chunk)
|
||||
written += min(BLOCK, remaining)
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
def dump_symlink(f, sb, gdt_data, inum, dest_path):
|
||||
"""Extract symlink target."""
|
||||
try:
|
||||
idata, slot = read_inode(f, sb, gdt_data, inum)
|
||||
size = struct.unpack_from('<I', idata, slot + 4)[0]
|
||||
if size <= 60:
|
||||
# Fast symlink - target in inode block area
|
||||
target = idata[slot+40:slot+40+size].decode('utf-8', errors='replace')
|
||||
else:
|
||||
extents = parse_extent_tree(idata, slot)
|
||||
if not extents:
|
||||
return False
|
||||
bdata = read_at(f, extents[0][1] * BLOCK, BLOCK)
|
||||
target = bdata[:size].decode('utf-8', errors='replace')
|
||||
os.symlink(target, dest_path)
|
||||
return True
|
||||
except (OSError, IndexError):
|
||||
return False
|
||||
|
||||
# ── recursive dumper ─────────────────────────────────────────────────────────
|
||||
|
||||
FTYPE_REG = 1
|
||||
FTYPE_DIR = 2
|
||||
FTYPE_SYM = 7
|
||||
|
||||
def dump_tree(f, sb, gdt_data, inum, dest_dir, depth=0, visited=None):
|
||||
if visited is None:
|
||||
visited = set()
|
||||
if inum in visited:
|
||||
return
|
||||
visited.add(inum)
|
||||
|
||||
try:
|
||||
entries = read_dir_entries(f, sb, gdt_data, inum)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
|
||||
for name, (child_inum, ftype) in entries.items():
|
||||
if name in ('.', '..'):
|
||||
continue
|
||||
# Sanitise name
|
||||
safe_name = name.replace('/', '_').replace('\x00', '')
|
||||
dest = os.path.join(dest_dir, safe_name)
|
||||
|
||||
try:
|
||||
if ftype == FTYPE_DIR:
|
||||
dump_tree(f, sb, gdt_data, child_inum, dest, depth+1, visited)
|
||||
elif ftype == FTYPE_REG:
|
||||
dump_file(f, sb, gdt_data, child_inum, dest)
|
||||
elif ftype == FTYPE_SYM:
|
||||
dump_symlink(f, sb, gdt_data, child_inum, dest)
|
||||
except Exception as e:
|
||||
print(f" WARN: {dest}: {e}", file=sys.stderr)
|
||||
|
||||
# ── main ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
print(f"Usage: {sys.argv[0]} <inode> <dest_dir>")
|
||||
sys.exit(1)
|
||||
|
||||
root_inum = int(sys.argv[1])
|
||||
dest_dir = sys.argv[2]
|
||||
|
||||
with open(DEV, 'rb') as f:
|
||||
sb_data = read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
|
||||
sb = parse_superblock(sb_data)
|
||||
assert sb['magic'] == 0xef53
|
||||
|
||||
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
|
||||
// sb['blocks_per_group']
|
||||
gdt_data = read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
|
||||
num_groups * sb['desc_size'])
|
||||
|
||||
print(f"Dumping inode {root_inum} -> {dest_dir}")
|
||||
dump_tree(f, sb, gdt_data, root_inum, dest_dir)
|
||||
print("Done")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user