271 lines
10 KiB
Python
271 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
EXT4 Filesystem Libraries
|
|
"""
|
|
import struct, os, sys, stat
|
|
from pathlib import Path
|
|
|
|
BLOCK=4096
|
|
|
|
def read_at(f, offset, size):
|
|
f.seek(offset)
|
|
return f.read(size)
|
|
|
|
def parse_superblock(data):
|
|
sb = {}
|
|
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
|
|
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
|
|
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
|
|
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
|
|
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
|
|
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
|
|
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
|
|
return sb
|
|
|
|
def parse_gdt_entry(gdt_data, offset, desc_size):
|
|
lo = struct.unpack_from('<I', gdt_data, offset + 8)[0]
|
|
if desc_size >= 64:
|
|
hi = struct.unpack_from('<I', gdt_data, offset + 40)[0]
|
|
return lo | (hi << 32)
|
|
return lo
|
|
|
|
def parse_extent_tree(data, inode_offset):
|
|
base = inode_offset + 40
|
|
magic, entries, _, depth = struct.unpack_from('<HHHH', data, base)
|
|
if magic != 0xF30A:
|
|
return []
|
|
extents = []
|
|
if depth == 0:
|
|
for i in range(min(entries, 4)):
|
|
o = base + 12 + i * 12
|
|
if o + 12 > len(data): break
|
|
l_block = struct.unpack_from('<I', data, o )[0]
|
|
ee_len = struct.unpack_from('<H', data, o + 4)[0]
|
|
start_hi = struct.unpack_from('<H', data, o + 6)[0]
|
|
start_lo = struct.unpack_from('<I', data, o + 8)[0]
|
|
phys = (start_hi << 32) | start_lo
|
|
if phys > 0:
|
|
extents.append((l_block, phys, ee_len & 0x7FFF))
|
|
else:
|
|
# Depth > 0: extent index node - follow first child
|
|
# (handles large dirs gracefully)
|
|
o = base + 12
|
|
ei_leaf_lo = struct.unpack_from('<I', data, o + 4)[0]
|
|
ei_leaf_hi = struct.unpack_from('<H', data, o + 8)[0]
|
|
extents.append((0, (ei_leaf_hi << 32) | ei_leaf_lo, 1))
|
|
return extents
|
|
|
|
def read_extent_tree_blocks(f, data, inode_offset):
|
|
"""
|
|
Fully recursive extent tree walker.
|
|
Returns sorted list of (logical_block, phys_block) pairs.
|
|
"""
|
|
base = inode_offset + 40
|
|
magic, entries, _, depth = struct.unpack_from('<HHHH', data, base)
|
|
if magic != 0xF30A:
|
|
return []
|
|
return _walk_extent_node(f, data, base, depth)
|
|
|
|
def _walk_extent_node(f, data, base, depth):
|
|
magic, entries, _, _ = struct.unpack_from('<HHHH', data, base)
|
|
if magic != 0xF30A:
|
|
return []
|
|
|
|
result = []
|
|
if depth == 0:
|
|
# Leaf node - actual extents
|
|
for i in range(entries):
|
|
o = base + 12 + i * 12
|
|
l_block = struct.unpack_from('<I', data, o )[0]
|
|
ee_len = struct.unpack_from('<H', data, o + 4)[0]
|
|
start_hi = struct.unpack_from('<H', data, o + 6)[0]
|
|
start_lo = struct.unpack_from('<I', data, o + 8)[0]
|
|
phys = (start_hi << 32) | start_lo
|
|
if phys > 0:
|
|
for b in range(ee_len & 0x7FFF):
|
|
result.append((l_block + b, phys + b))
|
|
else:
|
|
# Index node - recurse into each child
|
|
for i in range(entries):
|
|
o = base + 12 + i * 12
|
|
leaf_lo = struct.unpack_from('<I', data, o + 4)[0]
|
|
leaf_hi = struct.unpack_from('<H', data, o + 8)[0]
|
|
leaf_block = (leaf_hi << 32) | leaf_lo
|
|
try:
|
|
child_data = read_at(f, leaf_block * BLOCK, BLOCK)
|
|
result.extend(_walk_extent_node(f, child_data, 0, depth - 1))
|
|
except OSError:
|
|
pass
|
|
return result
|
|
|
|
def read_inode(f, sb, gdt_data, inum):
|
|
"""Return raw inode block data and offset within it."""
|
|
grp = (inum - 1) // sb['inodes_per_group']
|
|
local_idx = (inum - 1) % sb['inodes_per_group']
|
|
tbl_block = parse_gdt_entry(gdt_data, grp * sb['desc_size'], sb['desc_size'])
|
|
byte_off = local_idx * sb['inode_size']
|
|
blk_off = byte_off // BLOCK
|
|
slot = byte_off % BLOCK
|
|
data = read_at(f, (tbl_block + blk_off) * BLOCK, BLOCK)
|
|
return data, slot
|
|
|
|
def classify_inode(idata, slot):
|
|
"""
|
|
Returns 'deleted', 'orphaned', or 'active' based on inode fields.
|
|
"""
|
|
mode = struct.unpack_from('<H', idata, slot + 0)[0]
|
|
links_count = struct.unpack_from('<H', idata, slot + 26)[0]
|
|
dtime = struct.unpack_from('<I', idata, slot + 20)[0]
|
|
flags = struct.unpack_from('<I', idata, slot + 32)[0]
|
|
|
|
if dtime != 0 and links_count == 0:
|
|
return 'deleted'
|
|
if dtime != 0 and links_count > 0:
|
|
# Inconsistent - probably corruption
|
|
return 'corrupt'
|
|
if dtime == 0 and links_count == 0:
|
|
# Unallocated inode - should not appear in dir entries
|
|
return 'unallocated'
|
|
return 'active' # dtime=0, links_count>0 - normal live inode
|
|
|
|
def read_dir_entries(f, sb, gdt_data, inum):
|
|
"""Return dict of name -> (child_inum, ftype)."""
|
|
idata, slot = read_inode(f, sb, gdt_data, inum)
|
|
entries = {}
|
|
for logical, phys in sorted(read_extent_tree_blocks(f, idata, slot)):
|
|
try:
|
|
bdata = read_at(f, phys * BLOCK, BLOCK)
|
|
offset = 0
|
|
while offset < BLOCK - 8:
|
|
e_ino, rec_len, name_len, ftype = \
|
|
struct.unpack_from('<IHBB', bdata, offset)
|
|
if rec_len < 8 or offset + rec_len > BLOCK:
|
|
break
|
|
if e_ino != 0 and name_len > 0:
|
|
name = bdata[offset+8:offset+8+name_len]\
|
|
.decode('utf-8', errors='replace')
|
|
entries[name] = (e_ino, ftype)
|
|
offset += rec_len
|
|
except OSError:
|
|
pass
|
|
return entries
|
|
|
|
def dump_file(f, sb, gdt_data, inum, dest_path):
|
|
"""Extract a regular file by inode to dest_path."""
|
|
try:
|
|
idata, slot = read_inode(f, sb, gdt_data, inum)
|
|
size_lo = struct.unpack_from('<I', idata, slot + 4)[0]
|
|
size_hi = struct.unpack_from('<I', idata, slot + 108)[0]
|
|
size = size_lo | (size_hi << 32)
|
|
flags = struct.unpack_from('<I', idata, slot + 32)[0]
|
|
|
|
if flags & 0x10000000:
|
|
# Inline data - stored in inode body after extent header
|
|
inline = idata[slot+40:slot+40+size]
|
|
with open(dest_path, 'wb') as out:
|
|
out.write(inline)
|
|
return True
|
|
|
|
blocks = sorted(read_extent_tree_blocks(f, idata, slot))
|
|
written = 0
|
|
with open(dest_path, 'wb') as out:
|
|
# Handle sparse files - fill holes with zeros
|
|
for logical, phys in blocks:
|
|
hole = logical * BLOCK
|
|
if hole > written:
|
|
out.seek(hole)
|
|
written = hole
|
|
remaining = size - written
|
|
if remaining <= 0:
|
|
break
|
|
chunk = read_at(f, phys * BLOCK, BLOCK)
|
|
out.write(chunk[:min(BLOCK, remaining)])
|
|
written += min(BLOCK, remaining)
|
|
out.truncate(size)
|
|
return True
|
|
except OSError:
|
|
return False
|
|
|
|
def dump_symlink(f, sb, gdt_data, inum, dest_path):
|
|
try:
|
|
idata, slot = read_inode(f, sb, gdt_data, inum)
|
|
size = struct.unpack_from('<I', idata, slot + 4)[0]
|
|
if size <= 60:
|
|
target = idata[slot+40:slot+40+size].decode('utf-8', errors='replace')
|
|
else:
|
|
extents = read_extent_tree_blocks(f, idata, slot)
|
|
if not extents:
|
|
return False
|
|
bdata = read_at(f, extents[0][1] * BLOCK, BLOCK)
|
|
target = bdata[:size].decode('utf-8', errors='replace')
|
|
|
|
# Strip null terminator, control characters, and anything after first null
|
|
target = target.split('\x00')[0].strip()
|
|
|
|
if not target:
|
|
print(f" WARN empty symlink target for {dest_path}", file=sys.stderr)
|
|
return False
|
|
|
|
# Validate target looks like a path
|
|
if any(ord(c) < 32 for c in target):
|
|
print(f" WARN control chars in symlink target {dest_path!r} -> {target!r}",
|
|
file=sys.stderr)
|
|
return False
|
|
|
|
if os.path.lexists(dest_path):
|
|
return True # already exists from a previous run
|
|
|
|
os.symlink(target, dest_path)
|
|
return True
|
|
except (OSError, IndexError) as e:
|
|
print(f" WARN symlink {dest_path}: {e}", file=sys.stderr)
|
|
return False
|
|
|
|
# ── recursive dumper ─────────────────────────────────────────────────────────
|
|
|
|
FTYPE_REG = 1
|
|
FTYPE_DIR = 2
|
|
FTYPE_SYM = 7
|
|
|
|
def dump_tree(f, sb, gdt_data, inum, dest_dir, depth=0, visited=None):
|
|
if visited is None:
|
|
visited = set()
|
|
if inum in visited:
|
|
return
|
|
visited.add(inum)
|
|
|
|
try:
|
|
entries = read_dir_entries(f, sb, gdt_data, inum)
|
|
except Exception:
|
|
return
|
|
|
|
os.makedirs(dest_dir, exist_ok=True)
|
|
|
|
for name, (child_inum, ftype) in entries.items():
|
|
if name in ('.', '..'):
|
|
continue
|
|
safe_name = name.replace('/', '_').replace('\x00', '')
|
|
dest = os.path.join(dest_dir, safe_name)
|
|
|
|
try:
|
|
# If ftype unknown, derive from inode mode
|
|
if ftype == 0:
|
|
idata, slot = read_inode(f, sb, gdt_data, child_inum)
|
|
mode = struct.unpack_from('<H', idata, slot)[0]
|
|
itype = mode & 0xF000
|
|
if itype == 0x4000: ftype = FTYPE_DIR
|
|
elif itype == 0x8000: ftype = FTYPE_REG
|
|
elif itype == 0xA000: ftype = FTYPE_SYM
|
|
|
|
if ftype == FTYPE_DIR:
|
|
dump_tree(f, sb, gdt_data, child_inum, dest,
|
|
depth+1, visited)
|
|
elif ftype == FTYPE_REG:
|
|
dump_file(f, sb, gdt_data, child_inum, dest)
|
|
elif ftype == FTYPE_SYM:
|
|
dump_symlink(f, sb, gdt_data, child_inum, dest)
|
|
# ftype still 0 after mode check = special file, skip
|
|
|
|
except Exception as e:
|
|
print(f" WARN: {dest}: {e}", file=sys.stderr)
|