Initial remote commit

This commit is contained in:
2026-04-30 11:04:05 +00:00
commit b86e4f9a98
103 changed files with 262770 additions and 0 deletions

0
test/= Normal file
View File

1
test/INTERESTING_INODES Normal file
View File

@@ -0,0 +1 @@
3544785:etc directory (blech... from a container)

Binary file not shown.

Binary file not shown.

73
test/aa.sh Normal file
View File

@@ -0,0 +1,73 @@
python3 -c "
import struct
# Read superblock
with open('/dev/md0','rb') as f:
f.seek(2621441024) # physical primary sb
sb = f.read(1024)
total_blocks = struct.unpack_from('<I', sb, 4)[0]
bpg = struct.unpack_from('<I', sb, 40)[0]
bsize = 4096
num_groups = (total_blocks + bpg - 1) // bpg
gdt_blocks = (num_groups * 64 + bsize - 1) // bsize # 64 bytes per GDT entry
print(f'Total blocks: {total_blocks}')
print(f'Blocks/group: {bpg}')
print(f'Num groups: {num_groups}')
print(f'GDT size: {num_groups * 64} bytes = {gdt_blocks} blocks')
print(f'GDT spans blocks 1 to {gdt_blocks}')
print()
# Check which physical chunks cover the GDT
CHUNK_BYTES = 128 * 512 # 64KB
LV_PHYS_START = 5120000 * 512
def v_to_p_byte(virt_byte):
group = virt_byte // (5 * CHUNK_BYTES)
offset_in_group = virt_byte % (5 * CHUNK_BYTES)
chunk_in_group = offset_in_group // CHUNK_BYTES
intra = offset_in_group % CHUNK_BYTES
if chunk_in_group == 4:
return None # metadata chunk
phys = (LV_PHYS_START +
group * 4 * CHUNK_BYTES +
chunk_in_group * CHUNK_BYTES +
intra)
return phys
# GDT spans virtual bytes 4096 to 4096+gdt_blocks*4096
gdt_start_v = 4096 # block 1
gdt_end_v = 4096 + gdt_blocks * 4096
print(f'GDT virtual bytes: {gdt_start_v} to {gdt_end_v}')
print()
# Check each chunk that covers the GDT
print('Chunks covering GDT:')
pos = gdt_start_v
while pos < gdt_end_v:
chunk_in_group = (pos % (5 * CHUNK_BYTES)) // CHUNK_BYTES
phys = v_to_p_byte(pos)
chunk_end = pos + CHUNK_BYTES - (pos % CHUNK_BYTES)
print(f' Virtual {pos}-{min(chunk_end,gdt_end_v)}: '
f'chunk_type={chunk_in_group} '
f'physical={phys} '
f'{\"METADATA-LOST\" if phys is None else \"\"}')
pos = chunk_end
# Read GDT from nbd and check first few entries
print()
print('GDT entries via NBD:')
with open('/dev/nbd0','rb') as f:
f.seek(4096)
gdt_data = f.read(min(gdt_blocks * 4096, 65536))
for i in range(min(10, num_groups)):
entry = gdt_data[i*64:(i+1)*64]
bb = struct.unpack_from('<I', entry, 0)[0]
ib = struct.unpack_from('<I', entry, 4)[0]
it = struct.unpack_from('<I', entry, 8)[0]
cs = struct.unpack_from('<H', entry, 30)[0]
print(f' Group {i}: bb={bb} ib={ib} it={it} csum=0x{cs:04x}')
"

10
test/batch_recover.sh Normal file
View File

@@ -0,0 +1,10 @@
while read inum rest; do
mkdir -p /mnt/recovered/apr29/${inum}
python3 dump_tree.py ${inum} /mnt/recovered/apr29/${inum}/ &
# Limit to 10 parallel jobs
while (( $(jobs -r | wc -l) >= 4 )); do
wait -n 2>/dev/null || sleep 0.2
done
done < true_roots.txt
wait
echo "All done"

View File

@@ -0,0 +1,10 @@
while read inum rest; do
dest="/mnt/recovered/apr29/${inum}"
mkdir -p "${dest}"
python3 dump_tree.py --skip-existing ${inum} "${dest}/" &
while (( $(jobs -r | wc -l) >= 10 )); do
wait -n 2>/dev/null || sleep 0.2
done
done < true_roots.txt
wait
echo "All done"

8
test/batch_restore.sh Normal file
View File

@@ -0,0 +1,8 @@
while read inum rest; do
python3 restore_meta.py ${inum} /mnt/recovered/apr29/${inum}/ &
while (( $(jobs -r | wc -l) >= 10 )); do
wait -n 2>/dev/null || sleep 0.2
done
done < true_roots.txt
wait
echo "All done"

69
test/bb.sh Normal file
View File

@@ -0,0 +1,69 @@
# MySQL/MariaDB InnoDB pages are 16KB with recognizable structure
# Find them directly on the translated device
python3 -c "
CHUNK_B = 128*512
LV_START = 5120000*512
VIRT_SIZE = 9365766144*512
def read_virt(offset, length):
result = bytearray(length)
pos = offset
remaining = length
with open('/dev/md0','rb') as f:
while remaining > 0:
group = pos // (5*CHUNK_B)
in_group = pos % (5*CHUNK_B)
chunk_idx = in_group // CHUNK_B
intra = in_group % CHUNK_B
seg_len = min(CHUNK_B-intra, remaining)
dst_off = pos - offset
if chunk_idx != 4:
phys = LV_START + group*4*CHUNK_B + chunk_idx*CHUNK_B + intra
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off+len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
targets = [
b'pterodactyl',
b'wings_token',
b'server_id',
b'eula.txt',
b'server.properties',
b'level.dat',
b'bukkit.yml',
b'spigot.yml',
]
print('Scanning for game server / pterodactyl data...')
chunk = 64*1024*1024
offset = 0
while offset < VIRT_SIZE:
try:
data = read_virt(offset, min(chunk, VIRT_SIZE-offset))
except:
offset += chunk
continue
for target in targets:
pos = 0
while True:
idx = data.find(target, pos)
if idx < 0: break
abs_byte = offset + idx
ctx = data[max(0,idx-80):idx+120]
print(f'{target.decode()!r} @ byte {abs_byte} sector {abs_byte//512}')
try:
print(f' {ctx.decode(\"latin1\",errors=\"replace\")}')
except:
pass
print()
pos = idx + 1
offset += chunk
if offset % (10*1024*1024*1024) == 0:
print(f'--- {offset//1024**3}GB scanned ---', flush=True)
" 2>&1 | tee /tmp/ptero_scan.txt

85
test/build_merged.py Normal file
View File

@@ -0,0 +1,85 @@
import struct
CHUNK = 128*512
LV_START = 5120000*512
BSIZE = 4096
BPG = 32768
GDT_ENTRY = 64
NUM_GROUPS = 35728
def is_meta(virt_byte):
in_group = virt_byte % (5*CHUNK)
return (in_group // CHUNK) == 4
def raw_read(virt_offset, length):
result = bytearray(length)
pos = virt_offset
remaining = length
with open('/dev/md0','rb') as f:
while remaining > 0:
group = pos // (5*CHUNK)
in_group = pos % (5*CHUNK)
chunk_idx = in_group // CHUNK
intra = in_group % CHUNK
seg_len = min(CHUNK-intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = LV_START + group*4*CHUNK + chunk_idx*CHUNK + intra
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off+len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
# Build merged GDT: for each group, use whichever of primary/backup
# is NOT in a metadata chunk
print('Building merged GDT...')
primary_start = BSIZE # block 1
backup_start = (1*BPG + 1) * BSIZE # group 1 backup
# Read both GDTs in full
primary_gdt = raw_read(primary_start, NUM_GROUPS * GDT_ENTRY)
backup_gdt = raw_read(backup_start, NUM_GROUPS * GDT_ENTRY)
merged = bytearray(NUM_GROUPS * GDT_ENTRY)
primary_used = 0
backup_used = 0
neither = 0
for g in range(NUM_GROUPS):
prim_byte = primary_start + g * GDT_ENTRY
backup_byte = backup_start + g * GDT_ENTRY
src_off = g * GDT_ENTRY
if not is_meta(prim_byte):
# Primary is valid - use it
merged[src_off:src_off+GDT_ENTRY] = primary_gdt[src_off:src_off+GDT_ENTRY]
primary_used += 1
elif not is_meta(backup_byte):
# Primary is in metadata chunk - use backup
merged[src_off:src_off+GDT_ENTRY] = backup_gdt[src_off:src_off+GDT_ENTRY]
backup_used += 1
else:
# Both bad - shouldn't happen given our analysis
neither += 1
print(f'From primary GDT: {primary_used}')
print(f'From backup GDT: {backup_used}')
print(f'Neither (error): {neither}')
assert neither == 0, 'Unexpected gap in coverage!'
# Verify merged GDT looks sane
print()
print('Sample entries:')
for g in [0, 1, 100, 1000, 32699, 35000, 35727]:
e = merged[g*GDT_ENTRY:(g+1)*GDT_ENTRY]
bb = struct.unpack_from('<I',e,0)[0]
ib = struct.unpack_from('<I',e,4)[0]
it = struct.unpack_from('<I',e,8)[0]
cs = struct.unpack_from('<H',e,30)[0]
print(f' Group {g:6d}: bb={bb:8d} ib={ib:8d} it={it:10d} csum=0x{cs:04x}')
with open('/tmp/merged_gdt.bin','wb') as f:
f.write(merged)
print(f'Saved /tmp/merged_gdt.bin ({len(merged)//1024}KB)')

133
test/build_tree.py Normal file
View File

@@ -0,0 +1,133 @@
#!/usr/bin/env python3
"""
Build directory tree from ils output using fls parent pointers.
For each directory inode:
1. Run fls to get its contents and parent (..)
2. Record parent->child relationships
3. Walk parent chain to resolve full path
4. Place unreachable dirs in /orphans/
"""
import subprocess, sys, os, collections
DEVICE = '/dev/nbd0'
OUTDIR = '/mnt/recovered'
MIN_INODE = 106497 # first intact group
def fls(inode):
try:
r = subprocess.run(
['fls', DEVICE, str(inode)],
capture_output=True, text=True, timeout=30
)
entries = []
for line in r.stdout.splitlines():
try:
parts = line.split(None, 2)
if len(parts) < 3: continue
type_str = parts[0]
ino_str = parts[1].rstrip(':').lstrip('*')
name = parts[2].strip()
ino = int(ino_str)
etype = type_str[0]
entries.append((etype, ino, name))
except: continue
return entries
except:
return []
# Load directory inodes from ils output
print('Loading directory inodes...')
dir_inodes = []
with open('/tmp/dir_inodes.txt') as f:
for line in f:
parts = line.strip().split('|')
if len(parts) < 2: continue
try:
ino = int(parts[0])
if ino >= MIN_INODE:
dir_inodes.append(ino)
except: continue
print(f'Found {len(dir_inodes)} directory inodes to process')
# For each directory, get its parent and children
# parent_of[inode] = parent_inode
# children_of[inode] = [(child_inode, name, type)]
parent_of = {}
children_of = collections.defaultdict(list)
names = {} # inode -> name (as seen from parent)
print('Running fls on each directory...')
for idx, ino in enumerate(dir_inodes):
entries = fls(ino)
for etype, eino, ename in entries:
if ename == '..':
parent_of[ino] = eino
elif ename == '.':
continue
else:
children_of[ino].append((eino, ename, etype))
names[eino] = ename
if idx % 1000 == 0:
print(f' {idx}/{len(dir_inodes)} processed...', flush=True)
print(f'Built tree: {len(parent_of)} dirs with known parents')
# Resolve full paths by walking parent chain
resolved = {} # inode -> full path
def resolve(ino, depth=0):
if ino in resolved:
return resolved[ino]
if depth > 50:
return None
parent = parent_of.get(ino)
if parent is None:
path = f'orphans/{ino}'
resolved[ino] = path
return path
# Check if parent is in intact groups
if parent < MIN_INODE:
# Parent is in zeroed groups — this is a root-level orphan
# Use the name if we know it
name = names.get(ino, str(ino))
path = f'orphans/{name}_{ino}'
resolved[ino] = path
return path
parent_path = resolve(parent, depth + 1)
if parent_path is None:
path = f'orphans/{ino}'
else:
name = names.get(ino, str(ino))
path = os.path.join(parent_path, name)
resolved[ino] = path
return path
print('Resolving paths...')
for ino in dir_inodes:
resolve(ino)
# Print summary
orphans = sum(1 for p in resolved.values() if p.startswith('orphans'))
resolved_count = len(resolved) - orphans
print(f'Resolved paths: {resolved_count}')
print(f'Orphaned dirs: {orphans}')
print()
# Show interesting paths
print('Sample resolved paths:')
for ino, path in sorted(resolved.items(), key=lambda x: x[1]):
if any(x in path for x in ['var','pterodactyl','docker','mysql',
'www','log','lib']):
print(f' inode {ino:10d}: {path}')
# Save full tree
with open('/tmp/dir_tree.txt','w') as f:
for ino, path in sorted(resolved.items(), key=lambda x: x[1]):
f.write(f'{ino}\t{path}\n')
print(f'Saved {len(resolved)} paths to /tmp/dir_tree.txt')

71
test/cc.sh Normal file
View File

@@ -0,0 +1,71 @@
python3 -c "
CHUNK_B = 128*512
LV_START = 5120000*512
VIRT_SIZE = 9365766144*512
def read_virt(offset, length):
result = bytearray(length)
pos = offset
remaining = length
with open('/dev/md0','rb') as f:
while remaining > 0:
group = pos // (5*CHUNK_B)
in_group = pos % (5*CHUNK_B)
chunk_idx = in_group // CHUNK_B
intra = in_group % CHUNK_B
seg_len = min(CHUNK_B-intra, remaining)
dst_off = pos - offset
if chunk_idx != 4:
phys = LV_START + group*4*CHUNK_B + chunk_idx*CHUNK_B + intra
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off+len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
targets = [
b'pterodactyl',
b'wings_token',
b'server.properties',
b'level.dat',
b'bukkit.yml',
b'eula.txt',
b'server_id',
]
print('Scanning...')
chunk = 64*1024*1024
offset = 0
hits = {}
while offset < VIRT_SIZE:
try:
data = read_virt(offset, min(chunk, VIRT_SIZE-offset))
except:
offset += chunk
continue
for target in targets:
pos = 0
while True:
idx = data.find(target, pos)
if idx < 0: break
abs_byte = offset + idx
t = target.decode()
if t not in hits:
hits[t] = []
hits[t].append(abs_byte)
pos = idx + 1
offset += chunk
if offset % (10*1024*1024*1024) == 0:
print(f'--- {offset//1024**3}GB scanned ---', flush=True)
print()
print('=== RESULTS ===')
for target, locations in hits.items():
print(f'{target}: {len(locations)} hits')
for loc in locations[:5]:
print(f' byte {loc} sector {loc//512}')
" 2>&1 | tee /tmp/ptero_scan.txt

View File

@@ -0,0 +1,20 @@
print(f"\nOrphaned roots: {len(true_roots)}")
print(f"{'inode':>12} {'parent':>12} {'status':>12} {'dtime':>12} reason")
print('-' * 75)
with open(DEV, 'rb') as f:
for inum, parent, reason in sorted(true_roots):
try:
idata, slot = read_inode(f, sb, gdt_data, inum)
status = classify_inode(idata, slot)
dtime = struct.unpack_from('<I', idata, slot + 20)[0]
# Format dtime as human readable if set
if dtime:
import datetime
dt = datetime.datetime.fromtimestamp(dtime).strftime('%Y-%m-%d %H:%M:%S')
else:
dt = 'never'
except Exception:
status, dt = 'unreadable', 'unknown'
print(f"{inum:>12} {parent:>12} {status:>12} {dt:>19} {reason}")

46
test/dump_tree.py Normal file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env python3
"""
Recursive ext4 directory dumper by inode number.
Bypasses all metadata validation - uses extent trees directly.
"""
import struct, os, sys, stat
from pathlib import Path
DEV = '/dev/dm-0'
BLOCK = 4096
BACKUP_SB_BLOCK = 32768
import ext4lib
# ── main ─────────────────────────────────────────────────────────────────────
import argparse
def main():
parser = argparse.ArgumentParser(description='Recover ext4 directory tree by inode')
parser.add_argument('inode', type=int, help='Root inode number')
parser.add_argument('dest', help='Destination directory')
parser.add_argument('--skip-existing', action='store_true',
help='Skip recovery if destination directory already exists and is non-empty')
args = parser.parse_args()
if args.skip_existing and os.path.isdir(args.dest) and os.listdir(args.dest):
print(f"Skipping inode {args.inode} -> {args.dest} (already exists)")
sys.exit(0)
with open(DEV, 'rb') as f:
sb_data = ext4lib.read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
sb = ext4lib.parse_superblock(sb_data)
assert sb['magic'] == 0xef53
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
// sb['blocks_per_group']
gdt_data = ext4lib.read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
num_groups * sb['desc_size'])
print(f"Dumping inode {args.inode} -> {args.dest}")
ext4lib.dump_tree(f, sb, gdt_data, args.inode, args.dest)
print(f"Done inode {args.inode}")
if __name__ == '__main__':
main()

270
test/ext4lib.py Normal file
View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
"""
EXT4 Filesystem Libraries
"""
import struct, os, sys, stat
from pathlib import Path
BLOCK=4096
def read_at(f, offset, size):
f.seek(offset)
return f.read(size)
def parse_superblock(data):
sb = {}
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
return sb
def parse_gdt_entry(gdt_data, offset, desc_size):
lo = struct.unpack_from('<I', gdt_data, offset + 8)[0]
if desc_size >= 64:
hi = struct.unpack_from('<I', gdt_data, offset + 40)[0]
return lo | (hi << 32)
return lo
def parse_extent_tree(data, inode_offset):
base = inode_offset + 40
magic, entries, _, depth = struct.unpack_from('<HHHH', data, base)
if magic != 0xF30A:
return []
extents = []
if depth == 0:
for i in range(min(entries, 4)):
o = base + 12 + i * 12
if o + 12 > len(data): break
l_block = struct.unpack_from('<I', data, o )[0]
ee_len = struct.unpack_from('<H', data, o + 4)[0]
start_hi = struct.unpack_from('<H', data, o + 6)[0]
start_lo = struct.unpack_from('<I', data, o + 8)[0]
phys = (start_hi << 32) | start_lo
if phys > 0:
extents.append((l_block, phys, ee_len & 0x7FFF))
else:
# Depth > 0: extent index node - follow first child
# (handles large dirs gracefully)
o = base + 12
ei_leaf_lo = struct.unpack_from('<I', data, o + 4)[0]
ei_leaf_hi = struct.unpack_from('<H', data, o + 8)[0]
extents.append((0, (ei_leaf_hi << 32) | ei_leaf_lo, 1))
return extents
def read_extent_tree_blocks(f, data, inode_offset):
"""
Fully recursive extent tree walker.
Returns sorted list of (logical_block, phys_block) pairs.
"""
base = inode_offset + 40
magic, entries, _, depth = struct.unpack_from('<HHHH', data, base)
if magic != 0xF30A:
return []
return _walk_extent_node(f, data, base, depth)
def _walk_extent_node(f, data, base, depth):
magic, entries, _, _ = struct.unpack_from('<HHHH', data, base)
if magic != 0xF30A:
return []
result = []
if depth == 0:
# Leaf node - actual extents
for i in range(entries):
o = base + 12 + i * 12
l_block = struct.unpack_from('<I', data, o )[0]
ee_len = struct.unpack_from('<H', data, o + 4)[0]
start_hi = struct.unpack_from('<H', data, o + 6)[0]
start_lo = struct.unpack_from('<I', data, o + 8)[0]
phys = (start_hi << 32) | start_lo
if phys > 0:
for b in range(ee_len & 0x7FFF):
result.append((l_block + b, phys + b))
else:
# Index node - recurse into each child
for i in range(entries):
o = base + 12 + i * 12
leaf_lo = struct.unpack_from('<I', data, o + 4)[0]
leaf_hi = struct.unpack_from('<H', data, o + 8)[0]
leaf_block = (leaf_hi << 32) | leaf_lo
try:
child_data = read_at(f, leaf_block * BLOCK, BLOCK)
result.extend(_walk_extent_node(f, child_data, 0, depth - 1))
except OSError:
pass
return result
def read_inode(f, sb, gdt_data, inum):
"""Return raw inode block data and offset within it."""
grp = (inum - 1) // sb['inodes_per_group']
local_idx = (inum - 1) % sb['inodes_per_group']
tbl_block = parse_gdt_entry(gdt_data, grp * sb['desc_size'], sb['desc_size'])
byte_off = local_idx * sb['inode_size']
blk_off = byte_off // BLOCK
slot = byte_off % BLOCK
data = read_at(f, (tbl_block + blk_off) * BLOCK, BLOCK)
return data, slot
def classify_inode(idata, slot):
"""
Returns 'deleted', 'orphaned', or 'active' based on inode fields.
"""
mode = struct.unpack_from('<H', idata, slot + 0)[0]
links_count = struct.unpack_from('<H', idata, slot + 26)[0]
dtime = struct.unpack_from('<I', idata, slot + 20)[0]
flags = struct.unpack_from('<I', idata, slot + 32)[0]
if dtime != 0 and links_count == 0:
return 'deleted'
if dtime != 0 and links_count > 0:
# Inconsistent - probably corruption
return 'corrupt'
if dtime == 0 and links_count == 0:
# Unallocated inode - should not appear in dir entries
return 'unallocated'
return 'active' # dtime=0, links_count>0 - normal live inode
def read_dir_entries(f, sb, gdt_data, inum):
"""Return dict of name -> (child_inum, ftype)."""
idata, slot = read_inode(f, sb, gdt_data, inum)
entries = {}
for logical, phys in sorted(read_extent_tree_blocks(f, idata, slot)):
try:
bdata = read_at(f, phys * BLOCK, BLOCK)
offset = 0
while offset < BLOCK - 8:
e_ino, rec_len, name_len, ftype = \
struct.unpack_from('<IHBB', bdata, offset)
if rec_len < 8 or offset + rec_len > BLOCK:
break
if e_ino != 0 and name_len > 0:
name = bdata[offset+8:offset+8+name_len]\
.decode('utf-8', errors='replace')
entries[name] = (e_ino, ftype)
offset += rec_len
except OSError:
pass
return entries
def dump_file(f, sb, gdt_data, inum, dest_path):
"""Extract a regular file by inode to dest_path."""
try:
idata, slot = read_inode(f, sb, gdt_data, inum)
size_lo = struct.unpack_from('<I', idata, slot + 4)[0]
size_hi = struct.unpack_from('<I', idata, slot + 108)[0]
size = size_lo | (size_hi << 32)
flags = struct.unpack_from('<I', idata, slot + 32)[0]
if flags & 0x10000000:
# Inline data - stored in inode body after extent header
inline = idata[slot+40:slot+40+size]
with open(dest_path, 'wb') as out:
out.write(inline)
return True
blocks = sorted(read_extent_tree_blocks(f, idata, slot))
written = 0
with open(dest_path, 'wb') as out:
# Handle sparse files - fill holes with zeros
for logical, phys in blocks:
hole = logical * BLOCK
if hole > written:
out.seek(hole)
written = hole
remaining = size - written
if remaining <= 0:
break
chunk = read_at(f, phys * BLOCK, BLOCK)
out.write(chunk[:min(BLOCK, remaining)])
written += min(BLOCK, remaining)
out.truncate(size)
return True
except OSError:
return False
def dump_symlink(f, sb, gdt_data, inum, dest_path):
try:
idata, slot = read_inode(f, sb, gdt_data, inum)
size = struct.unpack_from('<I', idata, slot + 4)[0]
if size <= 60:
target = idata[slot+40:slot+40+size].decode('utf-8', errors='replace')
else:
extents = read_extent_tree_blocks(f, idata, slot)
if not extents:
return False
bdata = read_at(f, extents[0][1] * BLOCK, BLOCK)
target = bdata[:size].decode('utf-8', errors='replace')
# Strip null terminator, control characters, and anything after first null
target = target.split('\x00')[0].strip()
if not target:
print(f" WARN empty symlink target for {dest_path}", file=sys.stderr)
return False
# Validate target looks like a path
if any(ord(c) < 32 for c in target):
print(f" WARN control chars in symlink target {dest_path!r} -> {target!r}",
file=sys.stderr)
return False
if os.path.lexists(dest_path):
return True # already exists from a previous run
os.symlink(target, dest_path)
return True
except (OSError, IndexError) as e:
print(f" WARN symlink {dest_path}: {e}", file=sys.stderr)
return False
# ── recursive dumper ─────────────────────────────────────────────────────────
FTYPE_REG = 1
FTYPE_DIR = 2
FTYPE_SYM = 7
def dump_tree(f, sb, gdt_data, inum, dest_dir, depth=0, visited=None):
if visited is None:
visited = set()
if inum in visited:
return
visited.add(inum)
try:
entries = read_dir_entries(f, sb, gdt_data, inum)
except Exception:
return
os.makedirs(dest_dir, exist_ok=True)
for name, (child_inum, ftype) in entries.items():
if name in ('.', '..'):
continue
safe_name = name.replace('/', '_').replace('\x00', '')
dest = os.path.join(dest_dir, safe_name)
try:
# If ftype unknown, derive from inode mode
if ftype == 0:
idata, slot = read_inode(f, sb, gdt_data, child_inum)
mode = struct.unpack_from('<H', idata, slot)[0]
itype = mode & 0xF000
if itype == 0x4000: ftype = FTYPE_DIR
elif itype == 0x8000: ftype = FTYPE_REG
elif itype == 0xA000: ftype = FTYPE_SYM
if ftype == FTYPE_DIR:
dump_tree(f, sb, gdt_data, child_inum, dest,
depth+1, visited)
elif ftype == FTYPE_REG:
dump_file(f, sb, gdt_data, child_inum, dest)
elif ftype == FTYPE_SYM:
dump_symlink(f, sb, gdt_data, child_inum, dest)
# ftype still 0 after mode check = special file, skip
except Exception as e:
print(f" WARN: {dest}: {e}", file=sys.stderr)

35
test/ff.sh Normal file
View File

@@ -0,0 +1,35 @@
# Search for /var related strings in the assembled array
# Things that would only appear in /var:
python3 -c "
import os
targets = [
b'/var/log/syslog',
b'/var/lib/apt',
b'/var/cache',
b'dpkg/status',
b'apt/lists',
b'journald',
b'/var/log/auth.log',
]
with open('/dev/nbd0', 'rb') as f:
chunk = 128*1024*1024
offset = 0
limit = 50*1024*1024*1024
while offset < limit:
f.seek(offset)
data = f.read(chunk)
if not data: break
for target in targets:
pos = data.find(target)
if pos >= 0:
abs_byte = offset + pos
ctx = data[max(0,pos-50):pos+100]
print(f'{target.decode()!r} at byte {abs_byte}')
print(f' {ctx.decode(\"latin1\",errors=\"replace\")}')
print()
offset += chunk
if offset % (1024*1024*1024) == 0:
print(f'Scanned {offset//1024//1024//1024}GB...',flush=True)
" 2>&1 | grep -v "^Scanned"

69
test/find_parent.py Normal file
View File

@@ -0,0 +1,69 @@
import subprocess
DEVICE = '/dev/nbd0'
def fls(inode):
try:
r = subprocess.run(['fls', DEVICE, str(inode)],
capture_output=True, text=True, timeout=30)
return r.stdout
except:
return ''
def get_parent_and_name(inode):
output = fls(inode)
parent = None
children = []
for line in output.splitlines():
try:
parts = line.split(None, 2)
if len(parts) < 3: continue
type_str = parts[0]
ino = int(parts[1].rstrip(':').lstrip('*'))
name = parts[2].strip()
if name == '..':
parent = ino
elif name != '.':
children.append((type_str[0], ino, name))
except: continue
return parent, children
# Walk up from pterodactyl
print('Walking up from pterodactyl (inode 1574102)...')
chain = [(1574102, 'pterodactyl')]
inode = 1574102
for _ in range(20):
parent, _ = get_parent_and_name(inode)
if parent is None or parent == inode:
break
grp = (parent-1)//8192
# Get parent's name by looking at its own .. entry
parent_parent, parent_children = get_parent_and_name(parent)
# Find our name in parent's listing
name = f'inode_{parent}'
for t,i,n in parent_children:
if i == inode:
name = n
break
status = 'INTACT' if grp >= 13 else 'LOST'
print(f' [{status}] inode {parent} = {name!r} (group {grp})')
chain.append((parent, name))
if grp < 13:
print(f' Reached zeroed region at inode {parent} - stopping')
break
inode = parent
print()
print('Chain (bottom to top):')
for ino, name in chain:
print(f' {name} (inode {ino})')
# The highest intact inode is our extraction root
top_inode, top_name = chain[-1]
grp = (top_inode-1)//8192
if grp >= 13:
print(f'\nExtraction root: inode {top_inode} ({top_name!r})')
else:
# Use second to last
top_inode, top_name = chain[-2]
print(f'\nExtraction root: inode {top_inode} ({top_name!r})')

6415
test/fls_dirs.txt Normal file

File diff suppressed because it is too large Load Diff

41
test/gg.sh Normal file
View File

@@ -0,0 +1,41 @@
python3 -c "
targets = [
b'pterodactyl',
b'/var/lib/pterodactyl',
b'server.properties', # Minecraft
b'level.dat', # Minecraft world
b'bukkit.yml',
b'spigot.yml',
b'paper.yml',
b'eula.txt',
b'/var/lib/docker',
b'wings',
b'daemon.json',
]
with open('/dev/nbd0', 'rb') as f:
chunk = 128*1024*1024
offset = 0
limit = 100*1024*1024*1024 # first 100GB
found = {}
while offset < limit:
f.seek(offset)
data = f.read(chunk)
if not data: break
for target in targets:
pos = 0
while True:
idx = data.find(target, pos)
if idx < 0: break
abs_byte = offset + idx
if target not in found:
found[target] = []
found[target].append(abs_byte)
pos = idx + 1
offset += chunk
if offset % (2*1024*1024*1024) == 0:
print(f'Scanned {offset//1024//1024//1024}GB...',flush=True)
for t,locs in found.items():
print(f'{t.decode(\"latin1\")}: {len(locs)} hits, first at byte {locs[0]}')
" 2>&1

54
test/hh.sh Normal file
View File

@@ -0,0 +1,54 @@
python3 -c "
CHUNK = 128*512
LV_START = 5120000*512
VIRT_SIZE = 9372172288*512
def read_virt(offset, length):
result = bytearray(length)
pos = offset
remaining = length
with open('/dev/md0','rb') as f:
while remaining > 0:
group = pos // (5*CHUNK)
in_group = pos % (5*CHUNK)
chunk_idx = in_group // CHUNK
intra = in_group % CHUNK
seg_len = min(CHUNK-intra, remaining)
dst_off = pos - offset
if chunk_idx != 4:
phys = LV_START + group*4*CHUNK + chunk_idx*CHUNK + intra
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off+len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
# Check last 8MB of virtual disk
last_8mb_start = VIRT_SIZE - 8*1024*1024
print(f'Checking last 8MB: virtual bytes {last_8mb_start} to {VIRT_SIZE}')
print(f'= virtual sectors {last_8mb_start//512} to {VIRT_SIZE//512}')
data = read_virt(last_8mb_start, 8*1024*1024)
nonzero = sum(1 for b in data if b != 0)
zero_runs = 0
in_zero = False
for b in data:
if b == 0 and not in_zero:
in_zero = True
zero_runs += 1
elif b != 0:
in_zero = False
print(f'Non-zero bytes: {nonzero} / {8*1024*1024}')
print(f'Zero runs: {zero_runs}')
print(f'First 32 bytes: {data[:32].hex()}')
print(f'Last 32 bytes: {data[-32:].hex()}')
# Find first non-zero from the end
for i in range(len(data)-1, -1, -1):
if data[i] != 0:
print(f'Last non-zero byte at offset {i} from last_8mb_start')
print(f'= virtual byte {last_8mb_start+i}')
break
"

266
test/inode.list.txt Normal file
View File

@@ -0,0 +1,266 @@
Device: /dev/nbd0
Scanning groups 13 to 35727...
Group 1000/35728...
Group 2000/35728...
Group 3000/35728...
Group 4000/35728...
Group 5000/35728...
Group 6000/35728...
Group 7000/35728...
Group 8000/35728...
Group 9000/35728...
Group 10000/35728...
[INTACT] 'mysql' child= 1315529 parent= 83874094 type=dir
Group 11000/35728...
[INTACT] 'apache2' child= 1441863 parent= 92258147 type=dir
Group 12000/35728...
[INTACT] 'apache2' child= 1572931 parent= 100646751 type=dir
[INTACT] 'pterodactyl' child= 1574102 parent= 100647925 type=dir
[INTACT] 'log' child= 4473829 parent= 100648635 type=dir
[INTACT] 'log' child= 1590880 parent= 100664187 type=dir
[INTACT] 'log' child= 1590928 parent= 100664751 type=dir
[INTACT] 'archives' child= 1590964 parent= 100664787 type=dir
[INTACT] 'mysql' child= 1591190 parent= 100664840 type=dir
[INTACT] 'mysql' child= 1594026 parent= 100666127 type=link
[INTACT] 'mysql' child= 1593996 parent= 100667804 type=dir
[INTACT] 'log' child= 1703998 parent= 100678162 type=dir
Group 13000/35728...
[INTACT] 'docker' child= 1704222 parent= 109035581 type=file
[INTACT] 'log' child= 1708761 parent= 109039668 type=dir
[INTACT] 'log' child= 1713577 parent= 109044468 type=dir
[INTACT] 'archives' child= 1713111 parent= 109044470 type=dir
Group 14000/35728...
Group 15000/35728...
[INTACT] 'archives' child= 1976627 parent= 125823058 type=dir
[INTACT] 'mysql' child= 1976852 parent= 125823110 type=dir
[INTACT] 'mysql' child= 1979688 parent= 125824397 type=link
[INTACT] 'log' child= 1991889 parent= 125836432 type=dir
Group 16000/35728...
[INTACT] 'log' child= 2097588 parent= 134201370 type=dir
Group 17000/35728...
Group 18000/35728...
[INTACT] 'log' child= 2371446 parent= 150990428 type=dir
[INTACT] 'archives' child= 2371391 parent= 150990430 type=dir
[INTACT] 'log' child= 2372407 parent= 150991423 type=dir
[INTACT] 'mysql' child= 2385299 parent= 151004317 type=dir
Group 19000/35728...
[INTACT] 'log' child= 2498053 parent= 159374142 type=dir
[INTACT] 'log' child= 2498610 parent= 159375135 type=dir
[INTACT] 'archives' child= 2498562 parent= 159375137 type=dir
[INTACT] 'log' child= 2502619 parent= 159379170 type=dir
[INTACT] 'archives' child= 2502597 parent= 159379172 type=dir
[INTACT] 'pterodactyl' child= 2502697 parent= 159379272 type=dir
[INTACT] 'pterodactyl' child= 2502709 parent= 159379273 type=link
Group 20000/35728...
Group 21000/35728...
[INTACT] 'mysql' child= 2754290 parent= 176145895 type=dir
Group 22000/35728...
Group 23000/35728...
[INTACT] 'log' child= 3026191 parent= 192932796 type=dir
[INTACT] 'archives' child= 3026079 parent= 192932798 type=dir
[INTACT] 'log' child= 3026239 parent= 192932938 type=dir
[INTACT] 'docker' child= 3026262 parent= 192932981 type=file
[INTACT] 'log' child= 3032931 parent= 192939639 type=dir
Group 24000/35728...
[INTACT] 'log' child= 3149355 parent= 201313102 type=dir
[INTACT] 'archives' child= 3148850 parent= 201313105 type=dir
[INTACT] 'log' child= 3151959 parent= 201315933 type=dir
[INTACT] 'log' child= 3156862 parent= 201320877 type=dir
[INTACT] 'log' child= 3160172 parent= 201324390 type=dir
[INTACT] 'archives' child= 3160137 parent= 201324392 type=dir
[INTACT] 'log' child= 3164477 parent= 201328491 type=dir
[INTACT] 'mysql' child= 3164741 parent= 201328825 type=dir
Group 25000/35728...
[INTACT] 'mysql' child= 3280643 parent= 209702403 type=dir
[INTACT] 'mysql' child= 3281131 parent= 209702916 type=dir
[INTACT] 'mysql' child= 9046451 parent= 210222501 type=dir
[INTACT] 'log' child= 9044379 parent= 210223241 type=dir
[INTACT] 'log' child= 15349367 parent= 210324339 type=dir
[INTACT] 'log' child= 12204268 parent= 210329845 type=dir
[INTACT] 'log' child= 14955773 parent= 210360481 type=dir
[INTACT] 'archives' child= 14953284 parent= 210360483 type=dir
[INTACT] 'mysql' child= 4458148 parent= 210467478 type=dir
[INTACT] 'pterodactyl' child= 4459214 parent= 210493665 type=dir
[INTACT] 'docker' child= 4459256 parent= 210493665 type=dir
[INTACT] 'mysql' child= 4458283 parent= 210493665 type=dir
[INTACT] 'apache2' child= 4458419 parent= 210493665 type=dir
[INTACT] 'mysql' child= 49414148 parent= 210523963 type=dir
[INTACT] 'log' child= 19936354 parent= 210594543 type=dir
[INTACT] 'archives' child= 15368106 parent= 210596329 type=dir
[INTACT] 'archives' child= 16001027 parent= 210609986 type=dir
[INTACT] 'log' child= 15082000 parent= 210615907 type=dir
[INTACT] 'log' child= 17055930 parent= 210685762 type=dir
[INTACT] 'archives' child= 14821978 parent= 210685769 type=dir
[INTACT] 'apache2' child= 15732946 parent= 210686638 type=dir
[INTACT] 'archives' child= 14967386 parent= 210699927 type=dir
[INTACT] 'mysql' child= 13373267 parent= 212052530 type=dir
Group 26000/35728...
[INTACT] 'docker' child= 3410441 parent= 218089768 type=file
[INTACT] 'log' child= 3415134 parent= 218093944 type=dir
[INTACT] 'log' child= 3415175 parent= 218094502 type=dir
[INTACT] 'log' child= 3430467 parent= 218107906 type=dir
Group 27000/35728...
[INTACT] 'log' child= 3539730 parent= 226476593 type=dir
[INTACT] 'log' child= 3541853 parent= 226477131 type=dir
[INTACT] 'archives' child= 3540270 parent= 226477133 type=dir
[INTACT] 'log' child= 3540439 parent= 226477266 type=dir
[INTACT] 'archives' child= 3540405 parent= 226477268 type=dir
[INTACT] 'log' child= 3542407 parent= 226479225 type=dir
[INTACT] 'archives' child= 3542364 parent= 226479227 type=dir
[INTACT] 'log' child= 3543789 parent= 226480565 type=dir
[INTACT] 'mysql' child= 3544957 parent= 226481649 type=dir
Group 28000/35728...
[INTACT] 'log' child= 3671475 parent= 234865777 type=dir
[INTACT] 'archives' child= 3671380 parent= 234865779 type=dir
[INTACT] 'log' child= 3672438 parent= 234866814 type=dir
[INTACT] 'archives' child= 3672417 parent= 234866816 type=dir
Group 29000/35728...
[INTACT] 'mysql' child= 3802782 parent= 243254702 type=dir
[INTACT] 'log' child= 3822192 parent= 243271285 type=dir
Group 30000/35728...
[INTACT] 'log' child= 3942248 parent= 251649384 type=dir
[INTACT] 'mysql' child= 3946290 parent= 251655740 type=dir
[INTACT] 'docker' child= 3947763 parent= 251657234 type=file
[INTACT] 'log' child= 3952497 parent= 251661968 type=dir
[INTACT] 'mysql' child= 3955138 parent= 251664296 type=dir
[INTACT] 'mysql' child= 3961890 parent= 251668087 type=link
[INTACT] 'mysql' child= 3961819 parent= 251671257 type=dir
[INTACT] 'log' child= 3964640 parent= 251673996 type=dir
[INTACT] 'archives' child= 3964527 parent= 251673998 type=dir
Group 31000/35728...
[INTACT] 'archives' child= 4063735 parent= 260030742 type=dir
Group 32000/35728...
[INTACT] 'mysql' child= 4194328 parent= 268418850 type=dir
Group 33000/35728...
[INTACT] 'log' child= 4335460 parent= 276815206 type=dir
[INTACT] 'mysql' child= 4339502 parent= 276821560 type=dir
[INTACT] 'docker' child= 4341215 parent= 276823294 type=file
[INTACT] 'log' child= 4345359 parent= 276826952 type=dir
[INTACT] 'log' child= 4345911 parent= 276827945 type=dir
[INTACT] 'archives' child= 4345868 parent= 276827947 type=dir
[INTACT] 'log' child= 4347291 parent= 276829282 type=dir
[INTACT] 'log' child= 4348236 parent= 276830303 type=dir
Group 34000/35728...
[INTACT] 'apache2' child= 4458777 parent= 285198295 type=dir
[INTACT] 'apache2' child= 4459043 parent= 285198521 type=dir
[INTACT] 'log' child= 4468198 parent= 285205428 type=dir
[INTACT] 'mysql' child= 4472275 parent= 285211867 type=dir
[INTACT] 'docker' child= 4485280 parent= 285224895 type=file
[INTACT] 'log' child= 4587550 parent= 285229071 type=dir
Group 35000/35728...
=== SUMMARY ===
[INTACT] 'apache2' child=1441863 parent=92258147 type=dir
[INTACT] 'apache2' child=1572931 parent=100646751 type=dir
[INTACT] 'apache2' child=4458419 parent=210493665 type=dir
[INTACT] 'apache2' child=4458777 parent=285198295 type=dir
[INTACT] 'apache2' child=4459043 parent=285198521 type=dir
[INTACT] 'apache2' child=15732946 parent=210686638 type=dir
[INTACT] 'archives' child=1590964 parent=100664787 type=dir
[INTACT] 'archives' child=1713111 parent=109044470 type=dir
[INTACT] 'archives' child=1976627 parent=125823058 type=dir
[INTACT] 'archives' child=2371391 parent=150990430 type=dir
[INTACT] 'archives' child=2498562 parent=159375137 type=dir
[INTACT] 'archives' child=2502597 parent=159379172 type=dir
[INTACT] 'archives' child=3026079 parent=192932798 type=dir
[INTACT] 'archives' child=3148850 parent=201313105 type=dir
[INTACT] 'archives' child=3160137 parent=201324392 type=dir
[INTACT] 'archives' child=3540270 parent=226477133 type=dir
[INTACT] 'archives' child=3540405 parent=226477268 type=dir
[INTACT] 'archives' child=3542364 parent=226479227 type=dir
[INTACT] 'archives' child=3671380 parent=234865779 type=dir
[INTACT] 'archives' child=3672417 parent=234866816 type=dir
[INTACT] 'archives' child=3964527 parent=251673998 type=dir
[INTACT] 'archives' child=4063735 parent=260030742 type=dir
[INTACT] 'archives' child=4345868 parent=276827947 type=dir
[INTACT] 'archives' child=14821978 parent=210685769 type=dir
[INTACT] 'archives' child=14953284 parent=210360483 type=dir
[INTACT] 'archives' child=14967386 parent=210699927 type=dir
[INTACT] 'archives' child=15368106 parent=210596329 type=dir
[INTACT] 'archives' child=16001027 parent=210609986 type=dir
[INTACT] 'docker' child=1704222 parent=109035581 type=file
[INTACT] 'docker' child=3026262 parent=192932981 type=file
[INTACT] 'docker' child=3410441 parent=218089768 type=file
[INTACT] 'docker' child=3947763 parent=251657234 type=file
[INTACT] 'docker' child=4341215 parent=276823294 type=file
[INTACT] 'docker' child=4459256 parent=210493665 type=dir
[INTACT] 'docker' child=4485280 parent=285224895 type=file
[INTACT] 'log' child=1590880 parent=100664187 type=dir
[INTACT] 'log' child=1590928 parent=100664751 type=dir
[INTACT] 'log' child=1703998 parent=100678162 type=dir
[INTACT] 'log' child=1708761 parent=109039668 type=dir
[INTACT] 'log' child=1713577 parent=109044468 type=dir
[INTACT] 'log' child=1991889 parent=125836432 type=dir
[INTACT] 'log' child=2097588 parent=134201370 type=dir
[INTACT] 'log' child=2371446 parent=150990428 type=dir
[INTACT] 'log' child=2372407 parent=150991423 type=dir
[INTACT] 'log' child=2498053 parent=159374142 type=dir
[INTACT] 'log' child=2498610 parent=159375135 type=dir
[INTACT] 'log' child=2502619 parent=159379170 type=dir
[INTACT] 'log' child=3026191 parent=192932796 type=dir
[INTACT] 'log' child=3026239 parent=192932938 type=dir
[INTACT] 'log' child=3032931 parent=192939639 type=dir
[INTACT] 'log' child=3149355 parent=201313102 type=dir
[INTACT] 'log' child=3151959 parent=201315933 type=dir
[INTACT] 'log' child=3156862 parent=201320877 type=dir
[INTACT] 'log' child=3160172 parent=201324390 type=dir
[INTACT] 'log' child=3164477 parent=201328491 type=dir
[INTACT] 'log' child=3415134 parent=218093944 type=dir
[INTACT] 'log' child=3415175 parent=218094502 type=dir
[INTACT] 'log' child=3430467 parent=218107906 type=dir
[INTACT] 'log' child=3539730 parent=226476593 type=dir
[INTACT] 'log' child=3540439 parent=226477266 type=dir
[INTACT] 'log' child=3541853 parent=226477131 type=dir
[INTACT] 'log' child=3542407 parent=226479225 type=dir
[INTACT] 'log' child=3543789 parent=226480565 type=dir
[INTACT] 'log' child=3671475 parent=234865777 type=dir
[INTACT] 'log' child=3672438 parent=234866814 type=dir
[INTACT] 'log' child=3822192 parent=243271285 type=dir
[INTACT] 'log' child=3942248 parent=251649384 type=dir
[INTACT] 'log' child=3952497 parent=251661968 type=dir
[INTACT] 'log' child=3964640 parent=251673996 type=dir
[INTACT] 'log' child=4335460 parent=276815206 type=dir
[INTACT] 'log' child=4345359 parent=276826952 type=dir
[INTACT] 'log' child=4345911 parent=276827945 type=dir
[INTACT] 'log' child=4347291 parent=276829282 type=dir
[INTACT] 'log' child=4348236 parent=276830303 type=dir
[INTACT] 'log' child=4468198 parent=285205428 type=dir
[INTACT] 'log' child=4473829 parent=100648635 type=dir
[INTACT] 'log' child=4587550 parent=285229071 type=dir
[INTACT] 'log' child=9044379 parent=210223241 type=dir
[INTACT] 'log' child=12204268 parent=210329845 type=dir
[INTACT] 'log' child=14955773 parent=210360481 type=dir
[INTACT] 'log' child=15082000 parent=210615907 type=dir
[INTACT] 'log' child=15349367 parent=210324339 type=dir
[INTACT] 'log' child=17055930 parent=210685762 type=dir
[INTACT] 'log' child=19936354 parent=210594543 type=dir
[INTACT] 'mysql' child=1315529 parent=83874094 type=dir
[INTACT] 'mysql' child=1591190 parent=100664840 type=dir
[INTACT] 'mysql' child=1593996 parent=100667804 type=dir
[INTACT] 'mysql' child=1594026 parent=100666127 type=link
[INTACT] 'mysql' child=1976852 parent=125823110 type=dir
[INTACT] 'mysql' child=1979688 parent=125824397 type=link
[INTACT] 'mysql' child=2385299 parent=151004317 type=dir
[INTACT] 'mysql' child=2754290 parent=176145895 type=dir
[INTACT] 'mysql' child=3164741 parent=201328825 type=dir
[INTACT] 'mysql' child=3280643 parent=209702403 type=dir
[INTACT] 'mysql' child=3281131 parent=209702916 type=dir
[INTACT] 'mysql' child=3544957 parent=226481649 type=dir
[INTACT] 'mysql' child=3802782 parent=243254702 type=dir
[INTACT] 'mysql' child=3946290 parent=251655740 type=dir
[INTACT] 'mysql' child=3955138 parent=251664296 type=dir
[INTACT] 'mysql' child=3961819 parent=251671257 type=dir
[INTACT] 'mysql' child=3961890 parent=251668087 type=link
[INTACT] 'mysql' child=4194328 parent=268418850 type=dir
[INTACT] 'mysql' child=4339502 parent=276821560 type=dir
[INTACT] 'mysql' child=4458148 parent=210467478 type=dir
[INTACT] 'mysql' child=4458283 parent=210493665 type=dir
[INTACT] 'mysql' child=4472275 parent=285211867 type=dir
[INTACT] 'mysql' child=9046451 parent=210222501 type=dir
[INTACT] 'mysql' child=13373267 parent=212052530 type=dir
[INTACT] 'mysql' child=49414148 parent=210523963 type=dir
[INTACT] 'pterodactyl' child=1574102 parent=100647925 type=dir
[INTACT] 'pterodactyl' child=2502697 parent=159379272 type=dir
[INTACT] 'pterodactyl' child=2502709 parent=159379273 type=link
[INTACT] 'pterodactyl' child=4459214 parent=210493665 type=dir

268
test/inode.list2.txt Normal file
View File

@@ -0,0 +1,268 @@
Device: /dev/nbd0
Scanning groups 13 to 35727...
Group 1000/35728...
Group 2000/35728...
Group 3000/35728...
Group 4000/35728...
Group 5000/35728...
Group 6000/35728...
Group 7000/35728...
Group 8000/35728...
Group 9000/35728...
Group 10000/35728...
[INTACT] 'mysql' child= 1315529 parent= 83874094 type=dir
Group 11000/35728...
[INTACT] 'apache2' child= 1441863 parent= 92258147 type=dir
Group 12000/35728...
[INTACT] 'apache2' child= 1572931 parent= 100646751 type=dir
[INTACT] 'pterodactyl' child= 1574102 parent= 100647925 type=dir
[INTACT] 'log' child= 4473829 parent= 100648635 type=dir
[INTACT] 'log' child= 1590880 parent= 100664187 type=dir
[INTACT] 'log' child= 1590928 parent= 100664751 type=dir
[INTACT] 'archives' child= 1590964 parent= 100664787 type=dir
[INTACT] 'mysql' child= 1591190 parent= 100664840 type=dir
[INTACT] 'mysql' child= 1594026 parent= 100666127 type=link
[INTACT] 'mysql' child= 1593996 parent= 100667804 type=dir
[INTACT] 'log' child= 1703998 parent= 100678162 type=dir
Group 13000/35728...
[INTACT] 'docker' child= 1704222 parent= 109035581 type=file
[INTACT] 'log' child= 1708761 parent= 109039668 type=dir
[INTACT] 'log' child= 1713577 parent= 109044468 type=dir
[INTACT] 'archives' child= 1713111 parent= 109044470 type=dir
Group 14000/35728...
Group 15000/35728...
[INTACT] 'archives' child= 1976627 parent= 125823058 type=dir
[INTACT] 'mysql' child= 1976852 parent= 125823110 type=dir
[INTACT] 'mysql' child= 1979688 parent= 125824397 type=link
[INTACT] 'log' child= 1991889 parent= 125836432 type=dir
Group 16000/35728...
[INTACT] 'log' child= 2097588 parent= 134201370 type=dir
Group 17000/35728...
Group 18000/35728...
[INTACT] 'log' child= 2371446 parent= 150990428 type=dir
[INTACT] 'archives' child= 2371391 parent= 150990430 type=dir
[INTACT] 'log' child= 2372407 parent= 150991423 type=dir
[INTACT] 'mysql' child= 2385299 parent= 151004317 type=dir
Group 19000/35728...
[INTACT] 'log' child= 2498053 parent= 159374142 type=dir
[INTACT] 'log' child= 2498610 parent= 159375135 type=dir
[INTACT] 'archives' child= 2498562 parent= 159375137 type=dir
[INTACT] 'log' child= 2502619 parent= 159379170 type=dir
[INTACT] 'archives' child= 2502597 parent= 159379172 type=dir
[INTACT] 'pterodactyl' child= 2502697 parent= 159379272 type=dir
[INTACT] 'pterodactyl' child= 2502709 parent= 159379273 type=link
Group 20000/35728...
Group 21000/35728...
[INTACT] 'mysql' child= 2754290 parent= 176145895 type=dir
Group 22000/35728...
Group 23000/35728...
[INTACT] 'log' child= 3026191 parent= 192932796 type=dir
[INTACT] 'archives' child= 3026079 parent= 192932798 type=dir
[INTACT] 'log' child= 3026239 parent= 192932938 type=dir
[INTACT] 'docker' child= 3026262 parent= 192932981 type=file
[INTACT] 'log' child= 3032931 parent= 192939639 type=dir
Group 24000/35728...
[INTACT] 'log' child= 3149355 parent= 201313102 type=dir
[INTACT] 'archives' child= 3148850 parent= 201313105 type=dir
[INTACT] 'log' child= 3151959 parent= 201315933 type=dir
[INTACT] 'log' child= 3156862 parent= 201320877 type=dir
[INTACT] 'log' child= 3160172 parent= 201324390 type=dir
[INTACT] 'archives' child= 3160137 parent= 201324392 type=dir
[INTACT] 'log' child= 3164477 parent= 201328491 type=dir
[INTACT] 'mysql' child= 3164741 parent= 201328825 type=dir
Group 25000/35728...
[INTACT] 'mysql' child= 3280643 parent= 209702403 type=dir
[INTACT] 'mysql' child= 3281131 parent= 209702916 type=dir
[INTACT] 'mysql' child= 9046451 parent= 210222501 type=dir
[INTACT] 'log' child= 9044379 parent= 210223241 type=dir
[INTACT] 'commons-codec' child= 17313577 parent= 210279436 type=dir
[INTACT] 'log' child= 15349367 parent= 210324339 type=dir
[INTACT] 'log' child= 12204268 parent= 210329845 type=dir
[INTACT] 'log' child= 14955773 parent= 210360481 type=dir
[INTACT] 'archives' child= 14953284 parent= 210360483 type=dir
[INTACT] 'mysql' child= 4458148 parent= 210467478 type=dir
[INTACT] 'pterodactyl' child= 4459214 parent= 210493665 type=dir
[INTACT] 'docker' child= 4459256 parent= 210493665 type=dir
[INTACT] 'mysql' child= 4458283 parent= 210493665 type=dir
[INTACT] 'apache2' child= 4458419 parent= 210493665 type=dir
[INTACT] 'mysql' child= 49414148 parent= 210523963 type=dir
[INTACT] 'log' child= 19936354 parent= 210594543 type=dir
[INTACT] 'archives' child= 15368106 parent= 210596329 type=dir
[INTACT] 'archives' child= 16001027 parent= 210609986 type=dir
[INTACT] 'log' child= 15082000 parent= 210615907 type=dir
[INTACT] 'log' child= 17055930 parent= 210685762 type=dir
[INTACT] 'archives' child= 14821978 parent= 210685769 type=dir
[INTACT] 'apache2' child= 15732946 parent= 210686638 type=dir
[INTACT] 'archives' child= 14967386 parent= 210699927 type=dir
[INTACT] 'mysql' child= 13373267 parent= 212052530 type=dir
Group 26000/35728...
[INTACT] 'docker' child= 3410441 parent= 218089768 type=file
[INTACT] 'log' child= 3415134 parent= 218093944 type=dir
[INTACT] 'log' child= 3415175 parent= 218094502 type=dir
[INTACT] 'log' child= 3430467 parent= 218107906 type=dir
Group 27000/35728...
[INTACT] 'log' child= 3539730 parent= 226476593 type=dir
[INTACT] 'log' child= 3541853 parent= 226477131 type=dir
[INTACT] 'archives' child= 3540270 parent= 226477133 type=dir
[INTACT] 'log' child= 3540439 parent= 226477266 type=dir
[INTACT] 'archives' child= 3540405 parent= 226477268 type=dir
[INTACT] 'log' child= 3542407 parent= 226479225 type=dir
[INTACT] 'archives' child= 3542364 parent= 226479227 type=dir
[INTACT] 'log' child= 3543789 parent= 226480565 type=dir
[INTACT] 'mysql' child= 3544957 parent= 226481649 type=dir
Group 28000/35728...
[INTACT] 'log' child= 3671475 parent= 234865777 type=dir
[INTACT] 'archives' child= 3671380 parent= 234865779 type=dir
[INTACT] 'log' child= 3672438 parent= 234866814 type=dir
[INTACT] 'archives' child= 3672417 parent= 234866816 type=dir
Group 29000/35728...
[INTACT] 'mysql' child= 3802782 parent= 243254702 type=dir
[INTACT] 'log' child= 3822192 parent= 243271285 type=dir
Group 30000/35728...
[INTACT] 'log' child= 3942248 parent= 251649384 type=dir
[INTACT] 'mysql' child= 3946290 parent= 251655740 type=dir
[INTACT] 'docker' child= 3947763 parent= 251657234 type=file
[INTACT] 'log' child= 3952497 parent= 251661968 type=dir
[INTACT] 'mysql' child= 3955138 parent= 251664296 type=dir
[INTACT] 'mysql' child= 3961890 parent= 251668087 type=link
[INTACT] 'mysql' child= 3961819 parent= 251671257 type=dir
[INTACT] 'log' child= 3964640 parent= 251673996 type=dir
[INTACT] 'archives' child= 3964527 parent= 251673998 type=dir
Group 31000/35728...
[INTACT] 'archives' child= 4063735 parent= 260030742 type=dir
Group 32000/35728...
[INTACT] 'mysql' child= 4194328 parent= 268418850 type=dir
Group 33000/35728...
[INTACT] 'log' child= 4335460 parent= 276815206 type=dir
[INTACT] 'mysql' child= 4339502 parent= 276821560 type=dir
[INTACT] 'docker' child= 4341215 parent= 276823294 type=file
[INTACT] 'log' child= 4345359 parent= 276826952 type=dir
[INTACT] 'log' child= 4345911 parent= 276827945 type=dir
[INTACT] 'archives' child= 4345868 parent= 276827947 type=dir
[INTACT] 'log' child= 4347291 parent= 276829282 type=dir
[INTACT] 'log' child= 4348236 parent= 276830303 type=dir
Group 34000/35728...
[INTACT] 'apache2' child= 4458777 parent= 285198295 type=dir
[INTACT] 'apache2' child= 4459043 parent= 285198521 type=dir
[INTACT] 'log' child= 4468198 parent= 285205428 type=dir
[INTACT] 'mysql' child= 4472275 parent= 285211867 type=dir
[INTACT] 'docker' child= 4485280 parent= 285224895 type=file
[INTACT] 'log' child= 4587550 parent= 285229071 type=dir
Group 35000/35728...
=== SUMMARY ===
[INTACT] 'apache2' child=1441863 parent=92258147 type=dir
[INTACT] 'apache2' child=1572931 parent=100646751 type=dir
[INTACT] 'apache2' child=4458419 parent=210493665 type=dir
[INTACT] 'apache2' child=4458777 parent=285198295 type=dir
[INTACT] 'apache2' child=4459043 parent=285198521 type=dir
[INTACT] 'apache2' child=15732946 parent=210686638 type=dir
[INTACT] 'archives' child=1590964 parent=100664787 type=dir
[INTACT] 'archives' child=1713111 parent=109044470 type=dir
[INTACT] 'archives' child=1976627 parent=125823058 type=dir
[INTACT] 'archives' child=2371391 parent=150990430 type=dir
[INTACT] 'archives' child=2498562 parent=159375137 type=dir
[INTACT] 'archives' child=2502597 parent=159379172 type=dir
[INTACT] 'archives' child=3026079 parent=192932798 type=dir
[INTACT] 'archives' child=3148850 parent=201313105 type=dir
[INTACT] 'archives' child=3160137 parent=201324392 type=dir
[INTACT] 'archives' child=3540270 parent=226477133 type=dir
[INTACT] 'archives' child=3540405 parent=226477268 type=dir
[INTACT] 'archives' child=3542364 parent=226479227 type=dir
[INTACT] 'archives' child=3671380 parent=234865779 type=dir
[INTACT] 'archives' child=3672417 parent=234866816 type=dir
[INTACT] 'archives' child=3964527 parent=251673998 type=dir
[INTACT] 'archives' child=4063735 parent=260030742 type=dir
[INTACT] 'archives' child=4345868 parent=276827947 type=dir
[INTACT] 'archives' child=14821978 parent=210685769 type=dir
[INTACT] 'archives' child=14953284 parent=210360483 type=dir
[INTACT] 'archives' child=14967386 parent=210699927 type=dir
[INTACT] 'archives' child=15368106 parent=210596329 type=dir
[INTACT] 'archives' child=16001027 parent=210609986 type=dir
[INTACT] 'commons-codec' child=17313577 parent=210279436 type=dir
[INTACT] 'docker' child=1704222 parent=109035581 type=file
[INTACT] 'docker' child=3026262 parent=192932981 type=file
[INTACT] 'docker' child=3410441 parent=218089768 type=file
[INTACT] 'docker' child=3947763 parent=251657234 type=file
[INTACT] 'docker' child=4341215 parent=276823294 type=file
[INTACT] 'docker' child=4459256 parent=210493665 type=dir
[INTACT] 'docker' child=4485280 parent=285224895 type=file
[INTACT] 'log' child=1590880 parent=100664187 type=dir
[INTACT] 'log' child=1590928 parent=100664751 type=dir
[INTACT] 'log' child=1703998 parent=100678162 type=dir
[INTACT] 'log' child=1708761 parent=109039668 type=dir
[INTACT] 'log' child=1713577 parent=109044468 type=dir
[INTACT] 'log' child=1991889 parent=125836432 type=dir
[INTACT] 'log' child=2097588 parent=134201370 type=dir
[INTACT] 'log' child=2371446 parent=150990428 type=dir
[INTACT] 'log' child=2372407 parent=150991423 type=dir
[INTACT] 'log' child=2498053 parent=159374142 type=dir
[INTACT] 'log' child=2498610 parent=159375135 type=dir
[INTACT] 'log' child=2502619 parent=159379170 type=dir
[INTACT] 'log' child=3026191 parent=192932796 type=dir
[INTACT] 'log' child=3026239 parent=192932938 type=dir
[INTACT] 'log' child=3032931 parent=192939639 type=dir
[INTACT] 'log' child=3149355 parent=201313102 type=dir
[INTACT] 'log' child=3151959 parent=201315933 type=dir
[INTACT] 'log' child=3156862 parent=201320877 type=dir
[INTACT] 'log' child=3160172 parent=201324390 type=dir
[INTACT] 'log' child=3164477 parent=201328491 type=dir
[INTACT] 'log' child=3415134 parent=218093944 type=dir
[INTACT] 'log' child=3415175 parent=218094502 type=dir
[INTACT] 'log' child=3430467 parent=218107906 type=dir
[INTACT] 'log' child=3539730 parent=226476593 type=dir
[INTACT] 'log' child=3540439 parent=226477266 type=dir
[INTACT] 'log' child=3541853 parent=226477131 type=dir
[INTACT] 'log' child=3542407 parent=226479225 type=dir
[INTACT] 'log' child=3543789 parent=226480565 type=dir
[INTACT] 'log' child=3671475 parent=234865777 type=dir
[INTACT] 'log' child=3672438 parent=234866814 type=dir
[INTACT] 'log' child=3822192 parent=243271285 type=dir
[INTACT] 'log' child=3942248 parent=251649384 type=dir
[INTACT] 'log' child=3952497 parent=251661968 type=dir
[INTACT] 'log' child=3964640 parent=251673996 type=dir
[INTACT] 'log' child=4335460 parent=276815206 type=dir
[INTACT] 'log' child=4345359 parent=276826952 type=dir
[INTACT] 'log' child=4345911 parent=276827945 type=dir
[INTACT] 'log' child=4347291 parent=276829282 type=dir
[INTACT] 'log' child=4348236 parent=276830303 type=dir
[INTACT] 'log' child=4468198 parent=285205428 type=dir
[INTACT] 'log' child=4473829 parent=100648635 type=dir
[INTACT] 'log' child=4587550 parent=285229071 type=dir
[INTACT] 'log' child=9044379 parent=210223241 type=dir
[INTACT] 'log' child=12204268 parent=210329845 type=dir
[INTACT] 'log' child=14955773 parent=210360481 type=dir
[INTACT] 'log' child=15082000 parent=210615907 type=dir
[INTACT] 'log' child=15349367 parent=210324339 type=dir
[INTACT] 'log' child=17055930 parent=210685762 type=dir
[INTACT] 'log' child=19936354 parent=210594543 type=dir
[INTACT] 'mysql' child=1315529 parent=83874094 type=dir
[INTACT] 'mysql' child=1591190 parent=100664840 type=dir
[INTACT] 'mysql' child=1593996 parent=100667804 type=dir
[INTACT] 'mysql' child=1594026 parent=100666127 type=link
[INTACT] 'mysql' child=1976852 parent=125823110 type=dir
[INTACT] 'mysql' child=1979688 parent=125824397 type=link
[INTACT] 'mysql' child=2385299 parent=151004317 type=dir
[INTACT] 'mysql' child=2754290 parent=176145895 type=dir
[INTACT] 'mysql' child=3164741 parent=201328825 type=dir
[INTACT] 'mysql' child=3280643 parent=209702403 type=dir
[INTACT] 'mysql' child=3281131 parent=209702916 type=dir
[INTACT] 'mysql' child=3544957 parent=226481649 type=dir
[INTACT] 'mysql' child=3802782 parent=243254702 type=dir
[INTACT] 'mysql' child=3946290 parent=251655740 type=dir
[INTACT] 'mysql' child=3955138 parent=251664296 type=dir
[INTACT] 'mysql' child=3961819 parent=251671257 type=dir
[INTACT] 'mysql' child=3961890 parent=251668087 type=link
[INTACT] 'mysql' child=4194328 parent=268418850 type=dir
[INTACT] 'mysql' child=4339502 parent=276821560 type=dir
[INTACT] 'mysql' child=4458148 parent=210467478 type=dir
[INTACT] 'mysql' child=4458283 parent=210493665 type=dir
[INTACT] 'mysql' child=4472275 parent=285211867 type=dir
[INTACT] 'mysql' child=9046451 parent=210222501 type=dir
[INTACT] 'mysql' child=13373267 parent=212052530 type=dir
[INTACT] 'mysql' child=49414148 parent=210523963 type=dir
[INTACT] 'pterodactyl' child=1574102 parent=100647925 type=dir
[INTACT] 'pterodactyl' child=2502697 parent=159379272 type=dir
[INTACT] 'pterodactyl' child=2502709 parent=159379273 type=link
[INTACT] 'pterodactyl' child=4459214 parent=210493665 type=dir

88
test/inspect.py Normal file
View File

@@ -0,0 +1,88 @@
import struct
CHUNK = 128 * 512
LV_START = 5120000 * 512
BSIZE = 4096
IPG = 8192
INODE_SIZE = 256
def read_virt(virt_offset, length):
result = bytearray(length)
pos = virt_offset
remaining = length
with open('/dev/nbd0', 'rb') as f:
while remaining > 0:
f.seek(pos)
chunk = f.read(min(remaining, 65536))
if not chunk: break
dst = pos - virt_offset
result[dst:dst+len(chunk)] = chunk
pos += len(chunk)
remaining -= len(chunk)
return bytes(result)
def get_inode_table_block(group):
# Use backup GDT at group 1
gdt_start = (1 * 32768 + 1) * BSIZE
entry = read_virt(gdt_start + group * 64, 64)
it_lo = struct.unpack_from('<I', entry, 8)[0]
it_hi = struct.unpack_from('<I', entry, 40)[0]
return (it_hi << 32) | it_lo
def read_inode(inode_num):
group = (inode_num - 1) // IPG
index = (inode_num - 1) % IPG
it_block = get_inode_table_block(group)
inode_off = it_block * BSIZE + index * INODE_SIZE
return read_virt(inode_off, INODE_SIZE)
def read_extents(inode_data):
blocks = []
eh_magic = struct.unpack_from('<H', inode_data, 40)[0]
if eh_magic != 0xf30a:
return blocks
eh_entries = struct.unpack_from('<H', inode_data, 42)[0]
eh_depth = struct.unpack_from('<H', inode_data, 46)[0]
if eh_depth == 0:
for i in range(min(eh_entries, 4)):
off = 52 + i * 12
ee_len = struct.unpack_from('<H', inode_data, off+4)[0]
ee_start_hi = struct.unpack_from('<H', inode_data, off+6)[0]
ee_start_lo = struct.unpack_from('<I', inode_data, off+8)[0]
ee_start = (ee_start_hi << 32) | ee_start_lo
for b in range(ee_len):
blocks.append(ee_start + b)
return blocks
def list_dir(inode_num):
inode_data = read_inode(inode_num)
mode = struct.unpack_from('<H', inode_data, 0)[0]
size = struct.unpack_from('<I', inode_data, 4)[0]
links = struct.unpack_from('<H', inode_data, 26)[0]
print(f'Inode {inode_num}: mode=0x{mode:04x} size={size} links={links}')
entries = []
for block_num in read_extents(inode_data):
data = read_virt(block_num * BSIZE, BSIZE)
off = 0
while off < BSIZE - 8:
ino = struct.unpack_from('<I', data, off)[0]
rec_len = struct.unpack_from('<H', data, off+4)[0]
name_len = data[off+6]
ftype = data[off+7]
if rec_len < 8: break
if ino > 0 and name_len > 0:
name = data[off+8:off+8+name_len].decode('utf-8',errors='replace')
grp = (ino-1)//IPG
entries.append((name, ino, ftype, grp))
off += rec_len
type_names = {1:'file',2:'dir',7:'symlink'}
print(f'Directory entries ({len(entries)}):')
for name, ino, ftype, grp in sorted(entries):
status = 'INTACT' if grp >= 13 else 'LOST'
tname = type_names.get(ftype, str(ftype))
print(f' [{status}] {tname:6s} inode={ino:10d} group={grp:6d} {name!r}')
# Read the volumes directory
list_dir(1585918)

21
test/jj.sh Normal file
View File

@@ -0,0 +1,21 @@
python3 -c "
# Read actual PERC metadata chunks - these are NOT zeros
# They're at every 5th chunk position on each physical disk
# For disk 0 (sda), metadata chunks are at:
# phys_byte = data_offset + group*5*CHUNK + 4*CHUNK
# i.e., chunk positions 4, 9, 14, 19... of each disk
CHUNK = 128*512 # 64KB
LV_START = 5120000*512
# Read first few metadata chunks from sda
with open('/dev/sda','rb') as f:
for chunk_num in [4, 9, 14, 19, 24]:
phys = LV_START + chunk_num * CHUNK
f.seek(phys)
data = f.read(512)
nonzero = sum(1 for b in data if b != 0)
print(f'sda metadata chunk {chunk_num}: '
f'phys={phys} nonzero={nonzero}/512 '
f'first8={data[:8].hex()}')
"

57
test/k.sh Normal file
View File

@@ -0,0 +1,57 @@
python3 -c "
import struct
with open('/dev/nbd0','rb') as f:
f.seek(1024)
sb = f.read(1024)
# Print all key superblock fields
fields = [
('inodes_count', 0, 'I'),
('blocks_count_lo', 4, 'I'),
('free_blocks_lo', 12, 'I'),
('free_inodes', 16, 'I'),
('first_data_block', 20, 'I'),
('log_block_size', 24, 'I'), # bsize = 1024 << log_block_size
('blocks_per_group', 32, 'I'), # NOTE: offset 32 not 40
('inodes_per_group', 40, 'I'), # NOTE: offset 40
('magic', 56, 'H'),
('state', 58, 'H'),
('inode_size', 88, 'H'),
('block_group_nr', 90, 'H'),
('feat_compat', 92, 'I'),
('feat_incompat', 96, 'I'),
('feat_ro_compat', 100, 'I'),
('journal_inum', 180, 'I'),
('blocks_per_group_2', 32, 'I'),
('desc_size', 254, 'H'), # GDT entry size
('blocks_count_hi', 336, 'I'),
]
print('Superblock fields:')
for name, off, fmt in fields:
size = struct.calcsize('<'+fmt)
val = struct.unpack_from('<'+fmt, sb, off)[0]
print(f' {name:25s} @ offset {off:3d}: {val}')
# Recalculate key values
log_bsize = struct.unpack_from('<I', sb, 24)[0]
bsize = 1024 << log_bsize
bpg = struct.unpack_from('<I', sb, 32)[0]
ipg = struct.unpack_from('<I', sb, 40)[0]
desc_size = struct.unpack_from('<H', sb, 254)[0]
total_blocks_lo = struct.unpack_from('<I', sb, 4)[0]
total_blocks_hi = struct.unpack_from('<I', sb, 336)[0]
total_blocks = (total_blocks_hi << 32) | total_blocks_lo
num_groups = (total_blocks + bpg - 1) // bpg
print()
print(f'Computed values:')
print(f' block size: {bsize}')
print(f' blocks/group: {bpg}')
print(f' inodes/group: {ipg}')
print(f' GDT entry size: {desc_size}')
print(f' total blocks: {total_blocks}')
print(f' num groups: {num_groups}')
print(f' GDT size: {num_groups * desc_size} bytes')
"

51
test/kk.sh Normal file
View File

@@ -0,0 +1,51 @@
python3 -c "
CHUNK = 128*512 # 64KB
LV_START = 5120000*512
with open('/dev/sda','rb') as f:
# Read full first metadata chunk
phys = LV_START + 4 * CHUNK
f.seek(phys)
data = f.read(CHUNK)
# What's in it?
import struct
# Check for recognizable patterns
print(f'First 64 bytes:')
for i in range(0, 64, 16):
print(f' {data[i:i+16].hex()} {data[i:i+16].decode(\"latin1\",errors=\"replace\")}')
# Check if it repeats
chunk_size = len(data)
period = None
for p in [8, 16, 32, 64, 128, 256, 512]:
if data[:p] * (chunk_size // p) == data[:chunk_size - chunk_size%p]:
period = p
print(f'Data repeats every {p} bytes')
break
# Check if first 8 bytes appear elsewhere in the chunk
pattern = data[:8]
count = 0
pos = 0
while True:
idx = data.find(pattern, pos)
if idx < 0: break
count += 1
pos = idx + 1
print(f'First 8 bytes appear {count} times in chunk')
# Check spacing between repetitions
positions = []
pos = 0
while True:
idx = data.find(pattern, pos)
if idx < 0: break
positions.append(idx)
pos = idx + 1
if len(positions) > 1:
gaps = [positions[i+1]-positions[i] for i in range(len(positions)-1)]
print(f'Positions: {positions[:10]}')
print(f'Gaps: {gaps[:10]}')
"

45
test/l.sh Normal file
View File

@@ -0,0 +1,45 @@
python3 -c "
import struct, crcmod
crc32c = crcmod.predefined.mkCrcFun('crc-32c')
with open('/dev/nbd0','rb') as f:
f.seek(1024)
sb = f.read(1024)
uuid = sb[104:120]
csum_seed = struct.unpack_from('<I', sb, 408)[0]
print(f'uuid: {uuid.hex()}')
print(f'csum_seed: 0x{csum_seed:08x}')
# Read current group 0 GDT entry
data = bytearray(open('/tmp/merged_gdt.bin','rb').read())
e = bytearray(data[0:64])
print(f'Group 0 entry: {e.hex()}')
print(f'Current stored csum: 0x{struct.unpack_from(\"<H\",e,30)[0]:04x}')
print(f'e2fsck says should be: 0x03f5')
# Try different computation methods
# Method 1: standard
e2 = bytearray(e); struct.pack_into('<H',e2,30,0)
c1 = crc32c(uuid + struct.pack('<H',0) + bytes(e2), csum_seed) & 0xFFFF
print(f'Method 1 (seed+uuid+grp+entry): 0x{c1:04x}')
# Method 2: no seed
c2 = crc32c(uuid + struct.pack('<H',0) + bytes(e2)) & 0xFFFF
print(f'Method 2 (no seed): 0x{c2:04x}')
# Method 3: seed only on uuid
c3 = crc32c(struct.pack('<H',0) + bytes(e2), crc32c(uuid, csum_seed)) & 0xFFFF
print(f'Method 3 (seed on uuid first): 0x{c3:04x}')
# Method 4: what e2fsck uses internally
# From e2fsprogs source: crc32c(~0, uuid, 16) then crc32c(that, grp_le16+entry)
seed = crc32c(uuid, 0xFFFFFFFF)
c4 = crc32c(struct.pack('<H',0) + bytes(e2), seed) & 0xFFFF
print(f'Method 4 (e2fsprogs source): 0x{c4:04x}')
# Method 5: using stored seed from superblock offset 408
c5 = crc32c(struct.pack('<H',0) + bytes(e2), csum_seed) & 0xFFFF
print(f'Method 5 (stored seed only): 0x{c5:04x}')
"

340
test/nbd_server_v3 Normal file
View File

@@ -0,0 +1,340 @@
#!/usr/bin/env python3
"""
NBD server v3 — newstyle protocol + PERC H710 chunk translation.
Transformations applied on every read:
1. Chunk translation — skips every 5th 64KB chunk (PERC internal metadata)
2. Superblock patch — clears metadata_csum / gdt_csum / has_journal bits
3. GDT reconstruction — synthesizes correct group descriptors for regions
that fall inside metadata chunks
Usage:
python3 nbd_server_v3.py &
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""
mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root
"""
import socket
import struct
import threading
# ── Physical layout ───────────────────────────────────────────────────────────
DEV = '/dev/md0'
CHUNK_BYTES = 128 * 512 # 64 KB
LV_PHYS_START = 5120000 * 512 # byte 2,621,440,000
VIRT_SIZE = 9365766144 * 512 # from superblock block count
# ── ext4 filesystem parameters ────────────────────────────────────────────────
BSIZE = 4096
BPG = 32768
GDT_ENTRY = 64
NUM_GROUPS = 35728
GDT_START_VIRT = BSIZE
GDT_END_VIRT = BSIZE + NUM_GROUPS * GDT_ENTRY
SB_VIRT_OFFSET = 1024
SB_SIZE = 1024
SB_INCOMPAT_OFF = 96
SB_RO_COMPAT_OFF = 100
SB_CHECKSUM_OFF = 1020
INCOMPAT_HAS_JOURNAL = 0x00000004
RO_COMPAT_METADATA_CSUM = 0x00000400
RO_COMPAT_GDT_CSUM = 0x00000010
_patched_sb = None
_sb_lock = threading.Lock()
# ── NBD newstyle protocol constants ──────────────────────────────────────────
NBDMAGIC = 0x4e42444d41474943 # "NBDMAGIC"
IHAVEOPT = 0x49484156454F5054 # "IHAVEOPT"
REPLYMAGIC = 0x3e889045565a9
NBD_OPT_EXPORT_NAME = 1
NBD_OPT_ABORT = 2
NBD_OPT_LIST = 3
NBD_OPT_GO = 7
NBD_REP_ACK = 1
NBD_REP_SERVER = 2
NBD_REP_ERR_UNSUP = (1 << 31) | 1
NBD_REP_ERR_POLICY = (1 << 31) | 2
NBD_FLAG_HAS_FLAGS = 1 << 0
NBD_FLAG_READ_ONLY = 1 << 1
NBD_FLAG_SEND_FLUSH = 1 << 2
NBD_FLAG_FIXED_NEWSTYLE = 1 << 0 # client flag
NBD_FLAG_C_NO_ZEROES = 1 << 1 # client flag
NBD_REQUEST_MAGIC = 0x25609513
NBD_REPLY_MAGIC = 0x67446698
NBD_CMD_READ = 0
NBD_CMD_WRITE = 1
NBD_CMD_DISC = 2
NBD_CMD_FLUSH = 3
# ── Chunk translation ─────────────────────────────────────────────────────────
def raw_read(virt_offset, length):
result = bytearray(length)
pos = virt_offset
remaining = length
with open(DEV, 'rb') as f:
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = (LV_PHYS_START
+ group * 4 * CHUNK_BYTES
+ chunk_idx * CHUNK_BYTES
+ intra)
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off + len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
# ── GDT synthesis ─────────────────────────────────────────────────────────────
def make_gdt_entry(n):
"""Build 64-byte group descriptor for group n using confirmed pattern."""
gd = bytearray(GDT_ENTRY)
struct.pack_into('<I', gd, 0, 1038 + n) # block_bitmap_lo
struct.pack_into('<I', gd, 4, 1054 + n) # inode_bitmap_lo
struct.pack_into('<I', gd, 8, 1070 + n * 512) # inode_table_lo
# free counts = 0, checksum = 0 (metadata_csum cleared)
return bytes(gd)
def patch_gdt(data, virt_offset, length):
"""Overwrite metadata-chunk zeros within the GDT with synthesized entries."""
pos = virt_offset
remaining = length
while remaining > 0:
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
seg_end = pos + seg_len
if chunk_idx == 4:
# metadata chunk — was zeros; patch if overlaps GDT
ol_start = max(pos, GDT_START_VIRT)
ol_end = min(seg_end, GDT_END_VIRT)
if ol_start < ol_end:
for byte_abs in range(ol_start, ol_end):
gdt_rel = byte_abs - GDT_START_VIRT
grp = gdt_rel // GDT_ENTRY
byte_in = gdt_rel % GDT_ENTRY
if grp < NUM_GROUPS:
dst = byte_abs - virt_offset
entry = make_gdt_entry(grp)
data[dst] = entry[byte_in]
pos += seg_len
remaining -= seg_len
# ── Superblock patch ──────────────────────────────────────────────────────────
def get_patched_sb():
global _patched_sb
with _sb_lock:
if _patched_sb is not None:
return _patched_sb
sb = bytearray(raw_read(SB_VIRT_OFFSET, SB_SIZE))
incompat = struct.unpack_from('<I', sb, SB_INCOMPAT_OFF)[0]
ro_compat = struct.unpack_from('<I', sb, SB_RO_COMPAT_OFF)[0]
incompat &= ~INCOMPAT_HAS_JOURNAL
ro_compat &= ~(RO_COMPAT_METADATA_CSUM | RO_COMPAT_GDT_CSUM)
struct.pack_into('<I', sb, SB_INCOMPAT_OFF, incompat)
struct.pack_into('<I', sb, SB_RO_COMPAT_OFF, ro_compat)
struct.pack_into('<I', sb, SB_CHECKSUM_OFF, 0)
_patched_sb = bytes(sb)
print(f'[sb] patched: incompat=0x{incompat:08x} ro_compat=0x{ro_compat:08x}')
return _patched_sb
# ── Combined read ─────────────────────────────────────────────────────────────
def read_virtual(virt_offset, length):
data = bytearray(raw_read(virt_offset, length))
req_end = virt_offset + length
# Patch superblock
sb_s = SB_VIRT_OFFSET
sb_e = SB_VIRT_OFFSET + SB_SIZE
if virt_offset < sb_e and req_end > sb_s:
patched = get_patched_sb()
cs = max(virt_offset, sb_s) - virt_offset
ce = min(req_end, sb_e) - virt_offset
ss = max(virt_offset, sb_s) - sb_s
data[cs:ce] = patched[ss:ss + (ce - cs)]
# Patch GDT (only if request overlaps GDT region)
if virt_offset < GDT_END_VIRT and req_end > GDT_START_VIRT:
patch_gdt(data, virt_offset, length)
return bytes(data)
# ── NBD newstyle protocol ────────────────────────────────────────────────────
def recv_all(conn, n):
buf = b''
while len(buf) < n:
chunk = conn.recv(n - len(buf))
if not chunk:
raise ConnectionError('client disconnected')
buf += chunk
return buf
def send_reply(conn, opt, reply_type, data=b''):
conn.sendall(struct.pack('>QII', REPLYMAGIC, opt, reply_type))
conn.sendall(struct.pack('>I', len(data)))
if data:
conn.sendall(data)
def send_export_info(conn, no_zeroes=False):
"""Send export size + transmission flags."""
flags = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH
conn.sendall(struct.pack('>Q', VIRT_SIZE))
conn.sendall(struct.pack('>H', flags))
if not no_zeroes:
conn.sendall(b'\x00' * 124)
def handle_client(conn, addr):
print(f'[nbd] connect from {addr}')
no_zeroes = False
try:
# ── Fixed newstyle handshake ──────────────────────────────────────────
# S: magic + IHAVEOPT + server flags
conn.sendall(struct.pack('>Q', NBDMAGIC))
conn.sendall(struct.pack('>Q', IHAVEOPT))
server_flags = NBD_FLAG_HAS_FLAGS | (1 << 0) # FIXED_NEWSTYLE
conn.sendall(struct.pack('>H', server_flags))
# C: client flags
client_flags = struct.unpack('>I', recv_all(conn, 4))[0]
no_zeroes = bool(client_flags & NBD_FLAG_C_NO_ZEROES)
# ── Option haggling ───────────────────────────────────────────────────
while True:
opt_hdr = recv_all(conn, 16)
cli_magic, opt, opt_len = struct.unpack('>QII', opt_hdr)
opt_data = recv_all(conn, opt_len) if opt_len else b''
if opt == NBD_OPT_EXPORT_NAME:
# Immediate export — no reply, go straight to transmission
send_export_info(conn, no_zeroes)
break
elif opt == NBD_OPT_GO:
# Parse export name (uint32 len + name + info requests)
name_len = struct.unpack('>I', opt_data[:4])[0]
# Send INFO_EXPORT (type 0)
info = struct.pack('>HQH', 0, VIRT_SIZE,
NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH)
send_reply(conn, opt, NBD_REP_ACK, info)
# After ACK for GO, enter transmission
break
elif opt == NBD_OPT_LIST:
# Advertise one anonymous export
name = b''
send_reply(conn, opt, NBD_REP_SERVER,
struct.pack('>I', len(name)) + name)
send_reply(conn, opt, NBD_REP_ACK)
elif opt == NBD_OPT_ABORT:
send_reply(conn, opt, NBD_REP_ACK)
return
else:
send_reply(conn, opt, NBD_REP_ERR_UNSUP)
print(f'[nbd] {addr} — entering transmission phase')
# ── Transmission phase ────────────────────────────────────────────────
while True:
hdr = recv_all(conn, 28)
magic, flags, cmd, handle, offset, length = \
struct.unpack('>IHHQQI', hdr)
if magic != NBD_REQUEST_MAGIC:
print(f'[nbd] bad request magic 0x{magic:08x}')
return
if cmd == NBD_CMD_READ:
try:
payload = read_virtual(offset, length)
err = 0
except Exception as e:
print(f'[nbd] read err offset={offset} len={length}: {e}')
payload = b'\x00' * length
err = 0
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, err, handle))
conn.sendall(payload)
elif cmd in (NBD_CMD_DISC,):
print(f'[nbd] {addr} disconnected')
return
elif cmd == NBD_CMD_FLUSH:
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
else:
# Write or unknown — return EPERM
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 1, handle))
except (ConnectionError, BrokenPipeError, ConnectionResetError):
print(f'[nbd] {addr} dropped')
except Exception as e:
print(f'[nbd] {addr} error: {e}')
finally:
conn.close()
def main():
print('PERC H710 recovery NBD server v3 (newstyle protocol)')
print(f' device : {DEV}')
print(f' lv start : byte {LV_PHYS_START}')
print(f' virtual sz : {VIRT_SIZE // (1024**3):.1f} GB')
print(f' features : chunk-skip + sb-patch + gdt-synth + newstyle')
print()
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
srv.bind(('127.0.0.1', 10809))
srv.listen(5)
print('Listening on 127.0.0.1:10809')
print()
print('Connect with:')
print(' nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""')
print(' mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root')
print()
while True:
conn, addr = srv.accept()
threading.Thread(target=handle_client, args=(conn, addr),
daemon=True).start()
if __name__ == '__main__':
main()

340
test/nbd_server_v4 Normal file
View File

@@ -0,0 +1,340 @@
#!/usr/bin/env python3
"""
NBD server v4 — fixed newstyle protocol size negotiation.
Key fix: NBD_OPT_GO info reply must send NBD_INFO_EXPORT (type 0) record
with correct format, followed by NBD_REP_ACK. Without this the client
connects but reports size=0.
"""
import socket
import struct
import threading
# ── Physical layout ───────────────────────────────────────────────────────────
DEV = '/dev/md0'
CHUNK_BYTES = 128 * 512 # 64 KB
LV_PHYS_START = 5120000 * 512 # byte 2,621,440,000
VIRT_SIZE = 9365766144 * 512 # from superblock block count
# ── ext4 filesystem parameters ────────────────────────────────────────────────
BSIZE = 4096
GDT_ENTRY = 64
NUM_GROUPS = 35728
GDT_START_VIRT = BSIZE
GDT_END_VIRT = BSIZE + NUM_GROUPS * GDT_ENTRY
SB_VIRT_OFFSET = 1024
SB_SIZE = 1024
SB_INCOMPAT_OFF = 96
SB_RO_COMPAT_OFF = 100
SB_CHECKSUM_OFF = 1020
INCOMPAT_HAS_JOURNAL = 0x00000004
RO_COMPAT_METADATA_CSUM = 0x00000400
RO_COMPAT_GDT_CSUM = 0x00000010
_patched_sb = None
_sb_lock = threading.Lock()
# ── NBD protocol ──────────────────────────────────────────────────────────────
NBDMAGIC = 0x4e42444d41474943
IHAVEOPT = 0x49484156454F5054
REPLYMAGIC = 0x3e889045565a9
NBD_OPT_EXPORT_NAME = 1
NBD_OPT_ABORT = 2
NBD_OPT_LIST = 3
NBD_OPT_GO = 7
NBD_REP_ACK = 1
NBD_REP_SERVER = 2
NBD_REP_INFO = 3
NBD_REP_ERR_UNSUP = (1 << 31) | 1
NBD_INFO_EXPORT = 0 # info type: export size + flags
NBD_FLAG_HAS_FLAGS = 1 << 0
NBD_FLAG_READ_ONLY = 1 << 1
NBD_FLAG_SEND_FLUSH = 1 << 2
NBD_REQUEST_MAGIC = 0x25609513
NBD_REPLY_MAGIC = 0x67446698
NBD_CMD_READ = 0
NBD_CMD_WRITE = 1
NBD_CMD_DISC = 2
NBD_CMD_FLUSH = 3
TRANSMISSION_FLAGS = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH
# ── Chunk translation ─────────────────────────────────────────────────────────
def raw_read(virt_offset, length):
result = bytearray(length)
pos = virt_offset
remaining = length
with open(DEV, 'rb') as f:
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = (LV_PHYS_START
+ group * 4 * CHUNK_BYTES
+ chunk_idx * CHUNK_BYTES
+ intra)
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off + len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
# ── GDT synthesis ─────────────────────────────────────────────────────────────
def make_gdt_entry(n):
gd = bytearray(64)
struct.pack_into('<I', gd, 0, 1038 + n) # block_bitmap_lo
struct.pack_into('<I', gd, 4, 1054 + n) # inode_bitmap_lo
struct.pack_into('<I', gd, 8, 1070 + n * 512) # inode_table_lo
struct.pack_into('<I', gd, 32, 0) # block_bitmap_hi = 0
struct.pack_into('<I', gd, 36, 0) # inode_bitmap_hi = 0
struct.pack_into('<I', gd, 40, 0) # inode_table_hi = 0
# bg_flags: set INODE_UNINIT and BLOCK_UNINIT for damaged groups
# This tells ext4 to not validate bitmaps for these groups
struct.pack_into('<H', gd, 18, 0x0003) # EXT4_BG_INODE_UNINIT | EXT4_BG_BLOCK_UNINIT
return bytes(gd)
def patch_gdt(data, virt_offset, length):
pos = virt_offset
remaining = length
while remaining > 0:
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
seg_end = pos + seg_len
if chunk_idx == 4:
ol_start = max(pos, GDT_START_VIRT)
ol_end = min(seg_end, GDT_END_VIRT)
if ol_start < ol_end:
for byte_abs in range(ol_start, ol_end):
gdt_rel = byte_abs - GDT_START_VIRT
grp = gdt_rel // GDT_ENTRY
byte_in = gdt_rel % GDT_ENTRY
if grp < NUM_GROUPS:
entry = make_gdt_entry(grp)
data[byte_abs - virt_offset] = entry[byte_in]
pos += seg_len
remaining -= seg_len
# ── Superblock patch ──────────────────────────────────────────────────────────
def get_patched_sb():
global _patched_sb
with _sb_lock:
if _patched_sb is not None:
return _patched_sb
sb = bytearray(raw_read(SB_VIRT_OFFSET, SB_SIZE))
incompat = struct.unpack_from('<I', sb, SB_INCOMPAT_OFF)[0]
ro_compat = struct.unpack_from('<I', sb, SB_RO_COMPAT_OFF)[0]
incompat &= ~INCOMPAT_HAS_JOURNAL
ro_compat &= ~(RO_COMPAT_METADATA_CSUM | RO_COMPAT_GDT_CSUM)
struct.pack_into('<I', sb, SB_INCOMPAT_OFF, incompat)
struct.pack_into('<I', sb, SB_RO_COMPAT_OFF, ro_compat)
struct.pack_into('<I', sb, SB_CHECKSUM_OFF, 0)
# Add to get_patched_sb() in nbd_server_v4.py after the existing patches:
# Clear checksum type (offset 222, 1 byte)
sb[222] = 0
# Clear checksum seed (offset 408, 4 bytes)
struct.pack_into('<I', sb, 408, 0)
_patched_sb = bytes(sb)
print(f'[sb] patched incompat=0x{incompat:08x} ro_compat=0x{ro_compat:08x}')
return _patched_sb
# ── Combined read ─────────────────────────────────────────────────────────────
def read_virtual(virt_offset, length):
data = bytearray(raw_read(virt_offset, length))
req_end = virt_offset + length
# Patch superblock
sb_e = SB_VIRT_OFFSET + SB_SIZE
if virt_offset < sb_e and req_end > SB_VIRT_OFFSET:
patched = get_patched_sb()
cs = max(virt_offset, SB_VIRT_OFFSET) - virt_offset
ce = min(req_end, sb_e) - virt_offset
ss = max(virt_offset, SB_VIRT_OFFSET) - SB_VIRT_OFFSET
data[cs:ce] = patched[ss:ss + (ce - cs)]
# Patch GDT
if virt_offset < GDT_END_VIRT and req_end > GDT_START_VIRT:
patch_gdt(data, virt_offset, length)
return bytes(data)
# ── NBD helpers ───────────────────────────────────────────────────────────────
def recv_all(conn, n):
buf = b''
while len(buf) < n:
chunk = conn.recv(n - len(buf))
if not chunk:
raise ConnectionError('disconnected')
buf += chunk
return buf
def send_option_reply(conn, opt, reply_type, data=b''):
"""Send a structured option reply."""
conn.sendall(struct.pack('>Q', REPLYMAGIC))
conn.sendall(struct.pack('>I', opt))
conn.sendall(struct.pack('>I', reply_type))
conn.sendall(struct.pack('>I', len(data)))
if data:
conn.sendall(data)
def send_info_export(conn, opt):
"""
Send NBD_REP_INFO with NBD_INFO_EXPORT record, then NBD_REP_ACK.
This is what makes the client know the export size.
NBD_INFO_EXPORT record layout:
uint16 info_type = 0 (NBD_INFO_EXPORT)
uint64 export_size
uint16 transmission_flags
"""
info_data = struct.pack('>HQH',
NBD_INFO_EXPORT,
VIRT_SIZE,
TRANSMISSION_FLAGS)
send_option_reply(conn, opt, NBD_REP_INFO, info_data)
send_option_reply(conn, opt, NBD_REP_ACK)
# ── Client handler ────────────────────────────────────────────────────────────
def handle_client(conn, addr):
print(f'[nbd] connect from {addr}')
try:
# Server handshake: NBDMAGIC + IHAVEOPT + server flags
conn.sendall(struct.pack('>Q', NBDMAGIC))
conn.sendall(struct.pack('>Q', IHAVEOPT))
# Server flags: FIXED_NEWSTYLE (bit 0) + NO_ZEROES (bit 1)
conn.sendall(struct.pack('>H', 0x0003))
# Client flags (4 bytes)
recv_all(conn, 4)
# Option haggling
while True:
opt_hdr = recv_all(conn, 16)
_, opt, opt_len = struct.unpack('>QII', opt_hdr)
opt_data = recv_all(conn, opt_len) if opt_len else b''
print(f'[nbd] {addr} opt={opt} len={opt_len}')
if opt == NBD_OPT_EXPORT_NAME:
# Old-style: send size + flags + (maybe) padding, no reply magic
conn.sendall(struct.pack('>Q', VIRT_SIZE))
conn.sendall(struct.pack('>H', TRANSMISSION_FLAGS))
# NO_ZEROES flag set so skip 124-byte padding
break
elif opt == NBD_OPT_GO:
# New-style: send NBD_REP_INFO then NBD_REP_ACK
send_info_export(conn, opt)
break
elif opt == NBD_OPT_LIST:
name = b''
send_option_reply(conn, opt, NBD_REP_SERVER,
struct.pack('>I', len(name)) + name)
send_option_reply(conn, opt, NBD_REP_ACK)
elif opt == NBD_OPT_ABORT:
send_option_reply(conn, opt, NBD_REP_ACK)
return
else:
send_option_reply(conn, opt, NBD_REP_ERR_UNSUP)
print(f'[nbd] {addr} entering transmission, size={VIRT_SIZE}')
# Transmission phase
while True:
hdr = recv_all(conn, 28)
magic, flags, cmd, handle, offset, length = \
struct.unpack('>IHHQQI', hdr)
if magic != NBD_REQUEST_MAGIC:
print(f'[nbd] bad magic 0x{magic:08x}')
return
if cmd == NBD_CMD_READ:
try:
payload = read_virtual(offset, length)
except Exception as e:
print(f'[nbd] read error offset={offset} len={length}: {e}')
payload = b'\x00' * length
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
conn.sendall(payload)
elif cmd == NBD_CMD_FLUSH:
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
elif cmd == NBD_CMD_DISC:
print(f'[nbd] {addr} disconnect')
return
else:
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 1, handle))
except (ConnectionError, BrokenPipeError, ConnectionResetError):
print(f'[nbd] {addr} dropped')
except Exception as e:
print(f'[nbd] {addr} error: {e}')
import traceback; traceback.print_exc()
finally:
conn.close()
def main():
print('PERC H710 recovery NBD server v4')
print(f' device : {DEV}')
print(f' lv start : byte {LV_PHYS_START}')
print(f' virt size : {VIRT_SIZE} bytes ({VIRT_SIZE//1024//1024//1024} GB)')
print()
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
srv.bind(('127.0.0.1', 10809))
srv.listen(5)
print('Listening on 127.0.0.1:10809')
print(' nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""')
print(' mount -t ext4 -o ro,norecovery /dev/nbd0 /mnt/root')
print()
while True:
conn, addr = srv.accept()
threading.Thread(target=handle_client, args=(conn, addr),
daemon=True).start()
if __name__ == '__main__':
main()

212
test/nbd_server_v5.py Normal file
View File

@@ -0,0 +1,212 @@
#!/usr/bin/env python3
"""
NBD server v5 — minimal PERC H710 chunk translation only.
Simply reproduces what the PERC controller presented to the OS:
- Skip every 5th 64KB chunk (PERC internal metadata)
- Serve the result as a read-only block device
No superblock patching. No GDT synthesis. The filesystem was written
correctly through the PERC's translation — reading it back the same
way should give a valid filesystem.
Usage:
python3 nbd_server_v5.py &
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""
mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root
"""
import socket
import struct
import threading
DEV = '/dev/md0'
CHUNK_BYTES = 128 * 512 # 64 KB per chunk
LV_PHYS_START = 5120000 * 512 # byte 2,621,440,000
VIRT_SIZE = 9365766144 * 512 # from superblock block count
# NBD newstyle protocol constants
NBDMAGIC = 0x4e42444d41474943
IHAVEOPT = 0x49484156454F5054
REPLYMAGIC = 0x3e889045565a9
NBD_OPT_EXPORT_NAME = 1
NBD_OPT_ABORT = 2
NBD_OPT_LIST = 3
NBD_OPT_GO = 7
NBD_REP_ACK = 1
NBD_REP_SERVER = 2
NBD_REP_INFO = 3
NBD_REP_ERR_UNSUP = (1 << 31) | 1
NBD_INFO_EXPORT = 0
NBD_FLAG_HAS_FLAGS = 1 << 0
NBD_FLAG_READ_ONLY = 1 << 1
NBD_FLAG_SEND_FLUSH = 1 << 2
NBD_REQUEST_MAGIC = 0x25609513
NBD_REPLY_MAGIC = 0x67446698
NBD_CMD_READ = 0
NBD_CMD_DISC = 2
NBD_CMD_FLUSH = 3
TX_FLAGS = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH
def read_virtual(virt_offset, length):
"""
Read length bytes from the virtual address space.
Applies PERC chunk translation: every 5th 64KB chunk is skipped
(it contained PERC internal metadata and is not part of user data).
Skipped chunks return zeros.
"""
result = bytearray(length)
pos = virt_offset
remaining = length
with open(DEV, 'rb') as f:
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = (LV_PHYS_START
+ group * 4 * CHUNK_BYTES
+ chunk_idx * CHUNK_BYTES
+ intra)
f.seek(phys)
chunk = f.read(seg_len)
result[dst_off:dst_off + len(chunk)] = chunk
# chunk_idx == 4: leave as zeros (PERC metadata, not user data)
pos += seg_len
remaining -= seg_len
return bytes(result)
def recv_all(conn, n):
buf = b''
while len(buf) < n:
data = conn.recv(n - len(buf))
if not data:
raise ConnectionError('client disconnected')
buf += data
return buf
def send_reply(conn, opt, rtype, data=b''):
conn.sendall(struct.pack('>Q', REPLYMAGIC))
conn.sendall(struct.pack('>I', opt))
conn.sendall(struct.pack('>I', rtype))
conn.sendall(struct.pack('>I', len(data)))
if data:
conn.sendall(data)
def handle_client(conn, addr):
print(f'[nbd] {addr} connected')
try:
# Handshake
conn.sendall(struct.pack('>Q', NBDMAGIC))
conn.sendall(struct.pack('>Q', IHAVEOPT))
conn.sendall(struct.pack('>H', 0x0003)) # FIXED_NEWSTYLE | NO_ZEROES
recv_all(conn, 4) # client flags
# Option haggling
while True:
hdr = recv_all(conn, 16)
_, opt, opt_len = struct.unpack('>QII', hdr)
opt_data = recv_all(conn, opt_len) if opt_len else b''
if opt == NBD_OPT_EXPORT_NAME:
conn.sendall(struct.pack('>Q', VIRT_SIZE))
conn.sendall(struct.pack('>H', TX_FLAGS))
break
elif opt == NBD_OPT_GO:
info = struct.pack('>HQH', NBD_INFO_EXPORT, VIRT_SIZE, TX_FLAGS)
send_reply(conn, opt, NBD_REP_INFO, info)
send_reply(conn, opt, NBD_REP_ACK)
break
elif opt == NBD_OPT_LIST:
name = b''
send_reply(conn, opt, NBD_REP_SERVER,
struct.pack('>I', 0) + name)
send_reply(conn, opt, NBD_REP_ACK)
elif opt == NBD_OPT_ABORT:
send_reply(conn, opt, NBD_REP_ACK)
return
else:
send_reply(conn, opt, NBD_REP_ERR_UNSUP)
print(f'[nbd] {addr} transmission phase ({VIRT_SIZE//1024//1024//1024}GB)')
# Transmission
while True:
hdr = recv_all(conn, 28)
magic, flags, cmd, handle, offset, length = \
struct.unpack('>IHHQQI', hdr)
if magic != NBD_REQUEST_MAGIC:
print(f'[nbd] bad magic 0x{magic:08x}')
return
if cmd == NBD_CMD_READ:
try:
payload = read_virtual(offset, length)
except Exception as e:
print(f'[nbd] read error @ {offset}+{length}: {e}')
payload = b'\x00' * length
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
conn.sendall(payload)
elif cmd == NBD_CMD_FLUSH:
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
elif cmd == NBD_CMD_DISC:
print(f'[nbd] {addr} disconnect')
return
else:
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 1, handle))
except (ConnectionError, BrokenPipeError, ConnectionResetError):
print(f'[nbd] {addr} dropped')
except Exception as e:
print(f'[nbd] {addr} error: {e}')
import traceback; traceback.print_exc()
finally:
conn.close()
def main():
print(f'PERC H710 recovery NBD server v5 (minimal)')
print(f' device : {DEV}')
print(f' lv_start : byte {LV_PHYS_START} (sector {LV_PHYS_START//512})')
print(f' virt_size : {VIRT_SIZE//1024//1024//1024} GB')
print(f' chunk : {CHUNK_BYTES//1024} KB, every 5th skipped')
print()
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
srv.bind(('127.0.0.1', 10809))
srv.listen(5)
print('Listening on 127.0.0.1:10809')
print(' nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""')
print(' mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root')
print()
while True:
conn, addr = srv.accept()
threading.Thread(target=handle_client, args=(conn, addr),
daemon=True).start()
if __name__ == '__main__':
main()

271
test/nbd_server_v6.py Normal file
View File

@@ -0,0 +1,271 @@
#!/usr/bin/env python3
"""
NBD server v6 — chunk translation + backup GDT overlay.
The primary GDT has invalid checksums (written through PERC which stored
its own checksums). The backup GDT at block group 1 has valid checksums.
We serve the backup GDT bytes at the primary GDT location so the kernel
can validate and mount the filesystem.
Reads the backup GDT once at startup and caches it.
All other reads: pure chunk translation, no modification.
Usage:
python3 nbd_server_v6.py &
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""
mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root
"""
import socket
import struct
import threading
import sys
DEV = '/dev/md0'
CHUNK_BYTES = 128 * 512 # 64 KB
LV_PHYS_START = 5120000 * 512 # byte 2,621,440,000
VIRT_SIZE = 9365766144 * 512 # from superblock
BSIZE = 4096
GDT_ENTRY = 64
BPG = 32768
NUM_GROUPS = 35728
# Primary GDT: virtual bytes 4096 to 4096+NUM_GROUPS*64
PRIMARY_GDT_START = BSIZE
PRIMARY_GDT_SIZE = NUM_GROUPS * GDT_ENTRY
PRIMARY_GDT_END = PRIMARY_GDT_START + PRIMARY_GDT_SIZE
# Backup GDT: at block group 1, block 1 = (BPG+1)*BSIZE
BACKUP_GDT_START = (BPG + 1) * BSIZE
# Cached backup GDT (loaded at startup)
_backup_gdt = None
# NBD protocol
NBDMAGIC = 0x4e42444d41474943
IHAVEOPT = 0x49484156454F5054
REPLYMAGIC = 0x3e889045565a9
NBD_OPT_EXPORT_NAME = 1
NBD_OPT_ABORT = 2
NBD_OPT_LIST = 3
NBD_OPT_GO = 7
NBD_REP_ACK = 1
NBD_REP_SERVER = 2
NBD_REP_INFO = 3
NBD_REP_ERR_UNSUP = (1 << 31) | 1
NBD_INFO_EXPORT = 0
NBD_FLAG_HAS_FLAGS = 1 << 0
NBD_FLAG_READ_ONLY = 1 << 1
NBD_FLAG_SEND_FLUSH = 1 << 2
NBD_REQUEST_MAGIC = 0x25609513
NBD_REPLY_MAGIC = 0x67446698
NBD_CMD_READ = 0
NBD_CMD_DISC = 2
NBD_CMD_FLUSH = 3
TX_FLAGS = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH
def raw_read(virt_offset, length):
"""Pure chunk translation — no modifications."""
result = bytearray(length)
pos = virt_offset
remaining = length
with open(DEV, 'rb') as f:
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = (LV_PHYS_START
+ group * 4 * CHUNK_BYTES
+ chunk_idx * CHUNK_BYTES
+ intra)
f.seek(phys)
chunk = f.read(seg_len)
result[dst_off:dst_off + len(chunk)] = chunk
pos += seg_len
remaining -= seg_len
return bytes(result)
def load_backup_gdt():
"""Read and cache the backup GDT from group 1."""
global _backup_gdt
print(f'[gdt] loading backup GDT from virtual byte {BACKUP_GDT_START}...')
_backup_gdt = raw_read(BACKUP_GDT_START, PRIMARY_GDT_SIZE)
# Verify first few entries look sane
ok = True
for i in range(min(5, NUM_GROUPS)):
e = _backup_gdt[i*GDT_ENTRY:(i+1)*GDT_ENTRY]
bb = struct.unpack_from('<I', e, 0)[0]
ib = struct.unpack_from('<I', e, 4)[0]
it = struct.unpack_from('<I', e, 8)[0]
cs = struct.unpack_from('<H', e, 30)[0]
print(f'[gdt] group {i}: bb={bb} ib={ib} it={it} csum=0x{cs:04x}')
if bb == 0 and ib == 0 and it == 0:
ok = False
if not ok:
print('[gdt] WARNING: backup GDT looks empty, check parameters')
else:
print(f'[gdt] backup GDT loaded OK ({PRIMARY_GDT_SIZE//1024}KB)')
return ok
def read_virtual(virt_offset, length):
"""
Read with backup GDT overlay.
Primary GDT region (virtual bytes 4096..4096+NUM_GROUPS*64) is
served from the cached backup GDT instead of the primary location.
Everything else is pure chunk translation.
"""
req_end = virt_offset + length
# Fast path: no overlap with primary GDT
if req_end <= PRIMARY_GDT_START or virt_offset >= PRIMARY_GDT_END:
return raw_read(virt_offset, length)
# Build result from possibly multiple segments
data = bytearray(raw_read(virt_offset, length))
# Overlay backup GDT where request overlaps primary GDT
ol_start = max(virt_offset, PRIMARY_GDT_START)
ol_end = min(req_end, PRIMARY_GDT_END)
if ol_start < ol_end and _backup_gdt is not None:
src_off = ol_start - PRIMARY_GDT_START
dst_off = ol_start - virt_offset
n = ol_end - ol_start
data[dst_off:dst_off + n] = _backup_gdt[src_off:src_off + n]
return bytes(data)
def recv_all(conn, n):
buf = b''
while len(buf) < n:
d = conn.recv(n - len(buf))
if not d:
raise ConnectionError('disconnected')
buf += d
return buf
def send_reply(conn, opt, rtype, data=b''):
conn.sendall(struct.pack('>Q', REPLYMAGIC))
conn.sendall(struct.pack('>I', opt))
conn.sendall(struct.pack('>I', rtype))
conn.sendall(struct.pack('>I', len(data)))
if data:
conn.sendall(data)
def handle_client(conn, addr):
print(f'[nbd] {addr} connected')
try:
conn.sendall(struct.pack('>Q', NBDMAGIC))
conn.sendall(struct.pack('>Q', IHAVEOPT))
conn.sendall(struct.pack('>H', 0x0003))
recv_all(conn, 4)
while True:
hdr = recv_all(conn, 16)
_, opt, opt_len = struct.unpack('>QII', hdr)
opt_data = recv_all(conn, opt_len) if opt_len else b''
if opt == NBD_OPT_EXPORT_NAME:
conn.sendall(struct.pack('>Q', VIRT_SIZE))
conn.sendall(struct.pack('>H', TX_FLAGS))
break
elif opt == NBD_OPT_GO:
info = struct.pack('>HQH', NBD_INFO_EXPORT, VIRT_SIZE, TX_FLAGS)
send_reply(conn, opt, NBD_REP_INFO, info)
send_reply(conn, opt, NBD_REP_ACK)
break
elif opt == NBD_OPT_LIST:
send_reply(conn, opt, NBD_REP_SERVER,
struct.pack('>I', 0))
send_reply(conn, opt, NBD_REP_ACK)
elif opt == NBD_OPT_ABORT:
send_reply(conn, opt, NBD_REP_ACK)
return
else:
send_reply(conn, opt, NBD_REP_ERR_UNSUP)
print(f'[nbd] {addr} transmission ({VIRT_SIZE//1024//1024//1024}GB)')
while True:
hdr = recv_all(conn, 28)
magic, flags, cmd, handle, offset, length = \
struct.unpack('>IHHQQI', hdr)
if magic != NBD_REQUEST_MAGIC:
return
if cmd == NBD_CMD_READ:
try:
payload = read_virtual(offset, length)
except Exception as e:
print(f'[nbd] read error @ {offset}+{length}: {e}')
payload = b'\x00' * length
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
conn.sendall(payload)
elif cmd == NBD_CMD_FLUSH:
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
elif cmd == NBD_CMD_DISC:
return
else:
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 1, handle))
except (ConnectionError, BrokenPipeError, ConnectionResetError):
pass
except Exception as e:
print(f'[nbd] {addr} error: {e}')
import traceback; traceback.print_exc()
finally:
conn.close()
def main():
print('PERC H710 recovery NBD server v6')
print(f' device : {DEV}')
print(f' lv_start : byte {LV_PHYS_START}')
print(f' virt_size : {VIRT_SIZE//1024//1024//1024} GB')
print(f' primary GDT: virtual bytes {PRIMARY_GDT_START}-{PRIMARY_GDT_END}')
print(f' backup GDT : virtual byte {BACKUP_GDT_START}')
print()
if not load_backup_gdt():
print('ERROR: backup GDT load failed, check BACKUP_GDT_START')
sys.exit(1)
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
srv.bind(('127.0.0.1', 10809))
srv.listen(5)
print()
print('Listening on 127.0.0.1:10809')
print(' nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""')
print(' mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root')
print()
while True:
conn, addr = srv.accept()
threading.Thread(target=handle_client, args=(conn, addr),
daemon=True).start()
if __name__ == '__main__':
main()

263
test/nbd_server_v8.py Normal file
View File

@@ -0,0 +1,263 @@
#!/usr/bin/env python3
"""
NBD server v8 - PERC H710 chunk translation + on-the-fly patches:
1. Superblock: clear metadata_csum, gdt_csum, has_journal feature bits
2. GDT: zero all checksum fields in every group descriptor
Nothing is written to disk. All patches applied in memory on reads.
"""
import socket, struct, threading
DEV = '/dev/md0'
CHUNK_BYTES = 128 * 512
LV_PHYS_START = 5120000 * 512
VIRT_SIZE = 9365766144 * 512
# Superblock location and fields
SB_OFFSET = 1024
SB_SIZE = 1024
SB_COMPAT = 92
SB_INCOMPAT = 96
SB_RO_COMPAT = 100
SB_JNLINUM = 180
SB_CHECKSUM = 1020
INCOMPAT_HAS_JOURNAL = 0x004
COMPAT_HAS_JOURNAL = 0x004
RO_COMPAT_GDT_CSUM = 0x010
RO_COMPAT_METADATA_CSUM = 0x400
# GDT location and fields
GDT_OFFSET = 4096 # block 1
GDT_ENTRY_SZ = 64
NUM_GROUPS = 35728
GDT_SIZE = NUM_GROUPS * GDT_ENTRY_SZ
GDT_END = GDT_OFFSET + GDT_SIZE
GDT_CSUM_OFF = 30 # checksum offset within each entry
# NBD protocol
NBDMAGIC = 0x4e42444d41474943
IHAVEOPT = 0x49484156454F5054
REPLYMAGIC = 0x3e889045565a9
NBD_OPT_EXPORT_NAME = 1
NBD_OPT_ABORT = 2
NBD_OPT_LIST = 3
NBD_OPT_GO = 7
NBD_REP_ACK = 1
NBD_REP_SERVER = 2
NBD_REP_INFO = 3
NBD_REP_ERR_UNSUP = (1 << 31) | 1
NBD_INFO_EXPORT = 0
NBD_FLAG_HAS_FLAGS = 1 << 0
NBD_FLAG_READ_ONLY = 1 << 1
NBD_FLAG_SEND_FLUSH = 1 << 2
NBD_REQUEST_MAGIC = 0x25609513
NBD_REPLY_MAGIC = 0x67446698
NBD_CMD_READ = 0
NBD_CMD_DISC = 2
NBD_CMD_FLUSH = 3
TX_FLAGS = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH
def raw_read(virt_offset, length):
"""Pure chunk translation, no patching."""
result = bytearray(length)
pos = virt_offset
remaining = length
with open(DEV, 'rb') as f:
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = (LV_PHYS_START
+ group * 4 * CHUNK_BYTES
+ chunk_idx * CHUNK_BYTES
+ intra)
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off + len(data)] = data
pos += seg_len
remaining -= seg_len
return result
def patch_superblock(data, req_start):
"""
Patch superblock feature bits in a data buffer.
req_start: virtual offset of data[0].
"""
sb_start = SB_OFFSET
sb_end = SB_OFFSET + SB_SIZE
req_end = req_start + len(data)
if req_start >= sb_end or req_end <= sb_start:
return # no overlap
# Offsets into data buffer
def patch_u32(sb_field_off, mask_clear):
buf_off = sb_start + sb_field_off - req_start
if 0 <= buf_off <= len(data) - 4:
val = struct.unpack_from('<I', data, buf_off)[0]
val &= ~mask_clear
struct.pack_into('<I', data, buf_off, val)
def zero_u32(sb_field_off):
buf_off = sb_start + sb_field_off - req_start
if 0 <= buf_off <= len(data) - 4:
struct.pack_into('<I', data, buf_off, 0)
patch_u32(SB_COMPAT, COMPAT_HAS_JOURNAL)
patch_u32(SB_INCOMPAT, INCOMPAT_HAS_JOURNAL)
patch_u32(SB_RO_COMPAT, RO_COMPAT_GDT_CSUM | RO_COMPAT_METADATA_CSUM)
zero_u32(SB_JNLINUM)
zero_u32(SB_CHECKSUM)
def patch_gdt(data, req_start):
"""
Zero checksum field (offset 30) in every GDT entry that
overlaps with this read buffer.
req_start: virtual offset of data[0].
"""
req_end = req_start + len(data)
if req_start >= GDT_END or req_end <= GDT_OFFSET:
return # no overlap
# First and last GDT entry indices that could overlap
first_entry = max(0, (req_start - GDT_OFFSET) // GDT_ENTRY_SZ)
last_entry = min(NUM_GROUPS - 1,
(req_end - GDT_OFFSET - 1) // GDT_ENTRY_SZ)
for g in range(first_entry, last_entry + 1):
# Virtual offset of checksum field for group g
csum_virt = GDT_OFFSET + g * GDT_ENTRY_SZ + GDT_CSUM_OFF
buf_off = csum_virt - req_start
if 0 <= buf_off <= len(data) - 2:
struct.pack_into('<H', data, buf_off, 0)
def read_virtual(virt_offset, length):
data = raw_read(virt_offset, length)
patch_superblock(data, virt_offset)
patch_gdt(data, virt_offset)
return bytes(data)
def recv_all(conn, n):
buf = b''
while len(buf) < n:
d = conn.recv(n - len(buf))
if not d:
raise ConnectionError('disconnected')
buf += d
return buf
def send_reply(conn, opt, rtype, data=b''):
conn.sendall(struct.pack('>Q', REPLYMAGIC))
conn.sendall(struct.pack('>I', opt))
conn.sendall(struct.pack('>I', rtype))
conn.sendall(struct.pack('>I', len(data)))
if data:
conn.sendall(data)
def handle_client(conn, addr):
print(f'[nbd] {addr} connected')
try:
conn.sendall(struct.pack('>Q', NBDMAGIC))
conn.sendall(struct.pack('>Q', IHAVEOPT))
conn.sendall(struct.pack('>H', 0x0003))
recv_all(conn, 4)
while True:
hdr = recv_all(conn, 16)
_, opt, opt_len = struct.unpack('>QII', hdr)
opt_data = recv_all(conn, opt_len) if opt_len else b''
if opt == NBD_OPT_EXPORT_NAME:
conn.sendall(struct.pack('>Q', VIRT_SIZE))
conn.sendall(struct.pack('>H', TX_FLAGS))
break
elif opt == NBD_OPT_GO:
info = struct.pack('>HQH', NBD_INFO_EXPORT,
VIRT_SIZE, TX_FLAGS)
send_reply(conn, opt, NBD_REP_INFO, info)
send_reply(conn, opt, NBD_REP_ACK)
break
elif opt == NBD_OPT_LIST:
send_reply(conn, opt, NBD_REP_SERVER,
struct.pack('>I', 0))
send_reply(conn, opt, NBD_REP_ACK)
elif opt == NBD_OPT_ABORT:
send_reply(conn, opt, NBD_REP_ACK)
return
else:
send_reply(conn, opt, NBD_REP_ERR_UNSUP)
print(f'[nbd] {addr} in transmission')
while True:
hdr = recv_all(conn, 28)
magic, flags, cmd, handle, offset, length = \
struct.unpack('>IHHQQI', hdr)
if magic != NBD_REQUEST_MAGIC:
return
if cmd == NBD_CMD_READ:
try:
payload = read_virtual(offset, length)
except Exception as e:
print(f'[nbd] read error {offset}+{length}: {e}')
payload = b'\x00' * length
conn.sendall(struct.pack('>IIQ',
NBD_REPLY_MAGIC, 0, handle))
conn.sendall(payload)
elif cmd == NBD_CMD_FLUSH:
conn.sendall(struct.pack('>IIQ',
NBD_REPLY_MAGIC, 0, handle))
elif cmd == NBD_CMD_DISC:
print(f'[nbd] {addr} disconnected')
return
else:
conn.sendall(struct.pack('>IIQ',
NBD_REPLY_MAGIC, 1, handle))
except (ConnectionError, BrokenPipeError, ConnectionResetError):
print(f'[nbd] {addr} dropped')
except Exception as e:
print(f'[nbd] {addr} error: {e}')
import traceback; traceback.print_exc()
finally:
conn.close()
def main():
print('PERC H710 recovery NBD server v8')
print(f' device : {DEV}')
print(f' lv_start : byte {LV_PHYS_START}')
print(f' virt_size : {VIRT_SIZE // 1024**3} GB')
print(f' patches : superblock features + GDT checksums (on-the-fly)')
print()
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
srv.bind(('127.0.0.1', 10809))
srv.listen(5)
print('Listening on 127.0.0.1:10809')
print(' nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""')
print(' fls /dev/nbd0 1585918')
print(' mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root')
print()
while True:
conn, addr = srv.accept()
threading.Thread(target=handle_client, args=(conn, addr),
daemon=True).start()
if __name__ == '__main__':
main()

247
test/nbd_server_v9.py Normal file
View File

@@ -0,0 +1,247 @@
#!/usr/bin/env python3
"""
NBD server v9 - PERC H710 chunk translation + merged GDT overlay only.
No feature flag patching. No checksum zeroing. No journal disabling.
Just serves the filesystem exactly as it was written, with a complete
GDT built by merging primary (for non-metadata-chunk entries) and
backup group 1 (for entries that fall in metadata chunks).
Primary and backup GDT bad entries are completely disjoint (0 overlap),
so this gives 100% GDT coverage with authentic data and valid checksums.
Usage:
# First build the merged GDT:
# python3 build_merged_gdt.py (saves /tmp/merged_gdt.bin)
python3 nbd_server_v9.py &
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""
ext4magic /dev/nbd0 -s 4096 -n 32768 -R -I 1585918 -a $(date -d "2023-01-01" +%s) -d /mnt/recovered
"""
import socket, struct, threading, sys, os
DEV = '/dev/md0'
MERGED_GDT = '/tmp/merged_gdt.bin'
CHUNK_BYTES = 128 * 512
LV_PHYS_START = 5120000 * 512
VIRT_SIZE = 9365766144 * 512
# GDT location in virtual address space
BSIZE = 4096
GDT_ENTRY_SZ = 64
NUM_GROUPS = 35728
GDT_VIRT_START = BSIZE # block 1 = byte 4096
GDT_VIRT_END = BSIZE + NUM_GROUPS * GDT_ENTRY_SZ
# NBD protocol
NBDMAGIC = 0x4e42444d41474943
IHAVEOPT = 0x49484156454F5054
REPLYMAGIC = 0x3e889045565a9
NBD_OPT_EXPORT_NAME = 1
NBD_OPT_ABORT = 2
NBD_OPT_LIST = 3
NBD_OPT_GO = 7
NBD_REP_ACK = 1
NBD_REP_SERVER = 2
NBD_REP_INFO = 3
NBD_REP_ERR_UNSUP = (1 << 31) | 1
NBD_INFO_EXPORT = 0
NBD_FLAG_HAS_FLAGS = 1 << 0
NBD_FLAG_READ_ONLY = 1 << 1
NBD_FLAG_SEND_FLUSH = 1 << 2
NBD_REQUEST_MAGIC = 0x25609513
NBD_REPLY_MAGIC = 0x67446698
NBD_CMD_READ = 0
NBD_CMD_DISC = 2
NBD_CMD_FLUSH = 3
TX_FLAGS = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH
# Load merged GDT at startup
print(f'Loading merged GDT from {MERGED_GDT}...')
if not os.path.exists(MERGED_GDT):
print(f'ERROR: {MERGED_GDT} not found.')
print('Run the GDT builder script first.')
sys.exit(1)
with open(MERGED_GDT, 'rb') as f:
MERGED_GDT_DATA = f.read()
expected = NUM_GROUPS * GDT_ENTRY_SZ
if len(MERGED_GDT_DATA) != expected:
print(f'ERROR: merged GDT is {len(MERGED_GDT_DATA)} bytes, '
f'expected {expected}')
sys.exit(1)
print(f'Merged GDT loaded: {len(MERGED_GDT_DATA)//1024}KB '
f'({NUM_GROUPS} groups)')
def raw_read(virt_offset, length):
"""Pure PERC chunk translation, no modifications."""
result = bytearray(length)
pos = virt_offset
remaining = length
with open(DEV, 'rb') as f:
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = (LV_PHYS_START
+ group * 4 * CHUNK_BYTES
+ chunk_idx * CHUNK_BYTES
+ intra)
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off + len(data)] = data
pos += seg_len
remaining -= seg_len
return result
def read_virtual(virt_offset, length):
"""
Read with merged GDT overlay.
Only the primary GDT region (bytes 4096 to 4096+NUM_GROUPS*64)
is modified — replaced with the pre-built merged GDT.
Everything else is pure chunk translation, unmodified.
"""
data = raw_read(virt_offset, length)
req_end = virt_offset + length
# Overlay merged GDT where request overlaps primary GDT region
if virt_offset < GDT_VIRT_END and req_end > GDT_VIRT_START:
ol_start = max(virt_offset, GDT_VIRT_START)
ol_end = min(req_end, GDT_VIRT_END)
src_off = ol_start - GDT_VIRT_START
dst_off = ol_start - virt_offset
n = ol_end - ol_start
data[dst_off:dst_off + n] = MERGED_GDT_DATA[src_off:src_off + n]
return bytes(data)
def recv_all(conn, n):
buf = b''
while len(buf) < n:
d = conn.recv(n - len(buf))
if not d:
raise ConnectionError('disconnected')
buf += d
return buf
def send_reply(conn, opt, rtype, data=b''):
conn.sendall(struct.pack('>Q', REPLYMAGIC))
conn.sendall(struct.pack('>I', opt))
conn.sendall(struct.pack('>I', rtype))
conn.sendall(struct.pack('>I', len(data)))
if data:
conn.sendall(data)
def handle_client(conn, addr):
print(f'[nbd] {addr} connected')
try:
conn.sendall(struct.pack('>Q', NBDMAGIC))
conn.sendall(struct.pack('>Q', IHAVEOPT))
conn.sendall(struct.pack('>H', 0x0003))
recv_all(conn, 4)
while True:
hdr = recv_all(conn, 16)
_, opt, opt_len = struct.unpack('>QII', hdr)
opt_data = recv_all(conn, opt_len) if opt_len else b''
if opt == NBD_OPT_EXPORT_NAME:
conn.sendall(struct.pack('>Q', VIRT_SIZE))
conn.sendall(struct.pack('>H', TX_FLAGS))
break
elif opt == NBD_OPT_GO:
info = struct.pack('>HQH', NBD_INFO_EXPORT,
VIRT_SIZE, TX_FLAGS)
send_reply(conn, opt, NBD_REP_INFO, info)
send_reply(conn, opt, NBD_REP_ACK)
break
elif opt == NBD_OPT_LIST:
send_reply(conn, opt, NBD_REP_SERVER,
struct.pack('>I', 0))
send_reply(conn, opt, NBD_REP_ACK)
elif opt == NBD_OPT_ABORT:
send_reply(conn, opt, NBD_REP_ACK)
return
else:
send_reply(conn, opt, NBD_REP_ERR_UNSUP)
print(f'[nbd] {addr} transmission')
while True:
hdr = recv_all(conn, 28)
magic, flags, cmd, handle, offset, length = \
struct.unpack('>IHHQQI', hdr)
if magic != NBD_REQUEST_MAGIC:
return
if cmd == NBD_CMD_READ:
try:
payload = read_virtual(offset, length)
except Exception as e:
print(f'[nbd] read error {offset}+{length}: {e}')
payload = b'\x00' * length
conn.sendall(struct.pack('>IIQ',
NBD_REPLY_MAGIC, 0, handle))
conn.sendall(payload)
elif cmd == NBD_CMD_FLUSH:
conn.sendall(struct.pack('>IIQ',
NBD_REPLY_MAGIC, 0, handle))
elif cmd == NBD_CMD_DISC:
print(f'[nbd] {addr} disconnected')
return
else:
conn.sendall(struct.pack('>IIQ',
NBD_REPLY_MAGIC, 1, handle))
except (ConnectionError, BrokenPipeError, ConnectionResetError):
print(f'[nbd] {addr} dropped')
except Exception as e:
print(f'[nbd] {addr} error: {e}')
import traceback; traceback.print_exc()
finally:
conn.close()
def main():
print('PERC H710 recovery NBD server v9')
print(f' device : {DEV}')
print(f' lv_start : byte {LV_PHYS_START}')
print(f' virt_size : {VIRT_SIZE//1024**3} GB')
print(f' GDT region : bytes {GDT_VIRT_START}-{GDT_VIRT_END}')
print(f' patch : merged GDT only (primary + backup group 1)')
print(f' no patches : superblock, features, checksums all authentic')
print()
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
srv.bind(('127.0.0.1', 10809))
srv.listen(5)
print('Listening on 127.0.0.1:10809')
print(' nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""')
print()
print('Then try:')
print(' e2fsck -n /dev/nbd0')
print(' fls /dev/nbd0 1585918')
print(' ext4magic /dev/nbd0 -s 4096 -n 32768 -R -I 1585918 \\')
print(' -a $(date -d "2023-01-01" +%s) -d /mnt/recovered')
print()
while True:
conn, addr = srv.accept()
threading.Thread(target=handle_client, args=(conn, addr),
daemon=True).start()
if __name__ == '__main__':
main()

351
test/nbd_v2.py Normal file
View File

@@ -0,0 +1,351 @@
#!/usr/bin/env python3
"""
NBD server for PERC H710 RAID recovery.
Applies three transformations on every read:
1. Chunk translation — skips every 5th 64KB chunk (PERC internal metadata)
2. Superblock patch — clears metadata_csum and has_journal feature bits
so the kernel stops validating checksums we can't fix
3. GDT reconstruction — synthesizes correct group descriptor entries for
regions that fall inside metadata chunks (zeros)
Confirmed filesystem parameters (from session forensics):
bpg = 32768 blocks per group
ipg = 8192 inodes per group
inode_size = 256 bytes
GDT entry = 64 bytes (64bit feature)
num_groups = 35728
Group N: bb = 1038+N, ib = 1054+N, it = 1070 + N*512
Usage:
python3 nbd_server_v2.py &
nbd-client -g 127.0.0.1 10809 /dev/nbd0
mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root
"""
import socket
import struct
import threading
import os
import sys
# ── Physical layout ───────────────────────────────────────────────────────────
DEV = '/dev/md0'
CHUNK_BYTES = 128 * 512 # 64 KB
LV_PHYS_START = 5120000 * 512 # byte 2,621,440,000
VIRT_SIZE = 9365766144 * 512 # from superblock block count
# ── ext4 filesystem parameters ────────────────────────────────────────────────
BSIZE = 4096 # block size in bytes
BPG = 32768 # blocks per group
IPG = 8192 # inodes per group
INODE_SIZE = 256 # inode size in bytes
GDT_ENTRY = 64 # group descriptor size (64-bit mode)
NUM_GROUPS = 35728 # total number of block groups
# GDT starts at block 1 = byte 4096 from LV start
GDT_START_VIRT = BSIZE # virtual byte offset of GDT
GDT_END_VIRT = BSIZE + NUM_GROUPS * GDT_ENTRY # ~9.1 MB
# Superblock is at virtual byte 1024
SB_VIRT_OFFSET = 1024
SB_SIZE = 1024
# Feature flag offsets within superblock
SB_FEAT_COMPAT_OFF = 92 # s_feature_compat
SB_FEAT_INCOMPAT_OFF = 96 # s_feature_incompat
SB_FEAT_RO_COMPAT_OFF = 100 # s_feature_ro_compat
SB_CHECKSUM_OFF = 1020 # s_checksum (last 4 bytes of sb)
# Bits to clear
INCOMPAT_HAS_JOURNAL = 0x00000004
RO_COMPAT_METADATA_CSUM = 0x00000400
RO_COMPAT_GDT_CSUM = 0x00000010
# Cached patched superblock (built once on first read)
_patched_sb = None
_sb_lock = threading.Lock()
# ── NBD protocol constants ────────────────────────────────────────────────────
NBD_MAGIC = 0x4e42444d41474943
NBD_CLISERV_MAGIC = 0x00420281861253
NBD_REQUEST_MAGIC = 0x25609513
NBD_REPLY_MAGIC = 0x67446698
NBD_CMD_READ = 0
NBD_CMD_WRITE = 1
NBD_CMD_DISC = 2
NBD_FLAG_READ_ONLY = 0x0002
NBD_FLAG_HAS_FLAGS = 0x0001
# ── Chunk translation ─────────────────────────────────────────────────────────
def virt_to_phys_segments(virt_offset, length):
"""
Yield (phys_offset_or_None, seg_length) pairs covering the request.
None means the region falls in a PERC metadata chunk → return zeros.
"""
remaining = length
pos = virt_offset
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
chunk_remain = CHUNK_BYTES - intra
seg_len = min(chunk_remain, remaining)
if chunk_idx == 4:
yield None, seg_len
else:
phys = (LV_PHYS_START
+ group * 4 * CHUNK_BYTES
+ chunk_idx * CHUNK_BYTES
+ intra)
yield phys, seg_len
pos += seg_len
remaining -= seg_len
def raw_read(virt_offset, length):
"""Read virtual bytes without any patching."""
result = bytearray(length)
written = 0
with open(DEV, 'rb') as f:
for phys, seg_len in virt_to_phys_segments(virt_offset, length):
if phys is not None:
f.seek(phys)
data = f.read(seg_len)
result[written:written + len(data)] = data
written += seg_len
else:
written += seg_len # leave as zeros
return bytes(result)
# ── Group descriptor synthesis ────────────────────────────────────────────────
def make_gdt_entry(group_num):
"""
Build a 64-byte group descriptor for group N.
Pattern confirmed from session forensics:
block_bitmap_lo = 1038 + N
inode_bitmap_lo = 1054 + N
inode_table_lo = 1070 + N * 512
We set free counts to 0 and used_dirs to 0 — the kernel will recompute
them if needed. Checksum is left zero (we cleared metadata_csum).
"""
gd = bytearray(GDT_ENTRY)
bb = 1038 + group_num
ib = 1054 + group_num
it = 1070 + group_num * 512
struct.pack_into('<I', gd, 0, bb) # bg_block_bitmap_lo
struct.pack_into('<I', gd, 4, ib) # bg_inode_bitmap_lo
struct.pack_into('<I', gd, 8, it) # bg_inode_table_lo
struct.pack_into('<H', gd, 12, 0) # bg_free_blocks_count_lo
struct.pack_into('<H', gd, 14, 0) # bg_free_inodes_count_lo
struct.pack_into('<H', gd, 16, 0) # bg_used_dirs_count_lo
struct.pack_into('<H', gd, 18, 0) # bg_flags
struct.pack_into('<H', gd, 30, 0) # bg_checksum (cleared)
return bytes(gd)
def synthesize_gdt_region(virt_offset, length):
"""
Return synthesized GDT bytes for the virtual address range
[virt_offset, virt_offset+length) which overlaps the GDT area.
"""
result = bytearray(length)
for i in range(length):
abs_byte = virt_offset + i
if abs_byte < GDT_START_VIRT or abs_byte >= GDT_END_VIRT:
continue
gdt_rel = abs_byte - GDT_START_VIRT
grp = gdt_rel // GDT_ENTRY
byte_in = gdt_rel % GDT_ENTRY
if grp < NUM_GROUPS:
entry = make_gdt_entry(grp)
result[i] = entry[byte_in]
return bytes(result)
def overlaps(a_start, a_len, b_start, b_len):
return a_start < b_start + b_len and b_start < a_start + a_len
# ── Superblock patching ───────────────────────────────────────────────────────
def get_patched_superblock():
"""
Read the real superblock once, clear problematic feature bits, cache it.
Clears:
- has_journal (skip journal replay)
- metadata_csum (stop checksum validation of bitmaps/GDT)
- gdt_csum (older checksum variant)
"""
global _patched_sb
with _sb_lock:
if _patched_sb is not None:
return _patched_sb
sb = bytearray(raw_read(SB_VIRT_OFFSET, SB_SIZE))
incompat = struct.unpack_from('<I', sb, SB_FEAT_INCOMPAT_OFF)[0]
ro_compat = struct.unpack_from('<I', sb, SB_FEAT_RO_COMPAT_OFF)[0]
incompat &= ~INCOMPAT_HAS_JOURNAL
ro_compat &= ~RO_COMPAT_METADATA_CSUM
ro_compat &= ~RO_COMPAT_GDT_CSUM
struct.pack_into('<I', sb, SB_FEAT_INCOMPAT_OFF, incompat)
struct.pack_into('<I', sb, SB_FEAT_RO_COMPAT_OFF, ro_compat)
# Zero the superblock checksum (last 4 bytes) — no longer valid
struct.pack_into('<I', sb, SB_CHECKSUM_OFF, 0)
_patched_sb = bytes(sb)
print(f'[sb] patched incompat=0x{incompat:08x} ro_compat=0x{ro_compat:08x}')
return _patched_sb
# ── Main read function ────────────────────────────────────────────────────────
def read_virtual(virt_offset, length):
"""
Read `length` bytes from virtual offset `virt_offset`, applying:
1. Chunk translation (skip PERC metadata chunks)
2. Superblock patching
3. GDT synthesis for metadata-chunk regions within the GDT
"""
# Start with raw translated data
data = bytearray(raw_read(virt_offset, length))
req_end = virt_offset + length
# ── Patch 1: superblock ───────────────────────────────────────────────────
sb_start = SB_VIRT_OFFSET
sb_end = SB_VIRT_OFFSET + SB_SIZE
if overlaps(virt_offset, length, sb_start, SB_SIZE):
patched = get_patched_superblock()
copy_start = max(virt_offset, sb_start)
copy_end = min(req_end, sb_end)
src_off = copy_start - sb_start
dst_off = copy_start - virt_offset
n = copy_end - copy_start
data[dst_off:dst_off + n] = patched[src_off:src_off + n]
# ── Patch 2: GDT reconstruction ──────────────────────────────────────────
# Only needed for regions that overlap the GDT and fall in metadata chunks
if overlaps(virt_offset, length, GDT_START_VIRT, GDT_END_VIRT - GDT_START_VIRT):
# Walk the metadata chunks within this request to find zero regions
# inside the GDT and replace with synthesized data
pos = virt_offset
remaining = length
while remaining > 0:
group = pos // (5 * CHUNK_BYTES)
in_group = pos % (5 * CHUNK_BYTES)
chunk_idx = in_group // CHUNK_BYTES
intra = in_group % CHUNK_BYTES
seg_len = min(CHUNK_BYTES - intra, remaining)
if chunk_idx == 4:
# This is a metadata chunk — was read as zeros
# If it overlaps the GDT, synthesize
seg_end = pos + seg_len
if overlaps(pos, seg_len, GDT_START_VIRT,
GDT_END_VIRT - GDT_START_VIRT):
synth = synthesize_gdt_region(pos, seg_len)
dst_off = pos - virt_offset
data[dst_off:dst_off + seg_len] = synth
pos += seg_len
remaining -= seg_len
return bytes(data)
# ── NBD protocol (old-style handshake) ───────────────────────────────────────
def handle_client(conn, addr):
print(f'[nbd] client connected from {addr}')
try:
# Old-style handshake: magic + cliserv_magic + size + flags + 124 pad
conn.sendall(b'NBDMAGIC')
conn.sendall(struct.pack('>Q', NBD_CLISERV_MAGIC))
conn.sendall(struct.pack('>Q', VIRT_SIZE))
conn.sendall(struct.pack('>H', NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY))
conn.sendall(b'\x00' * 124)
print(f'[nbd] handshake done, serving {VIRT_SIZE // (1024**3):.1f} GB')
while True:
hdr = b''
while len(hdr) < 28:
chunk = conn.recv(28 - len(hdr))
if not chunk:
return
hdr += chunk
magic, flags, cmd, handle, offset, length = \
struct.unpack('>IHHQQI', hdr)
if magic != NBD_REQUEST_MAGIC:
print(f'[nbd] bad magic 0x{magic:08x}, closing')
return
if cmd == NBD_CMD_READ:
try:
payload = read_virtual(offset, length)
except Exception as e:
print(f'[nbd] read error offset={offset} len={length}: {e}')
payload = b'\x00' * length
reply = struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle)
conn.sendall(reply + payload)
elif cmd == NBD_CMD_DISC:
print(f'[nbd] client disconnected cleanly')
return
else:
# Return error for writes and other commands
reply = struct.pack('>IIQ', NBD_REPLY_MAGIC, 1, handle)
conn.sendall(reply)
except (ConnectionResetError, BrokenPipeError):
print(f'[nbd] client {addr} dropped connection')
except Exception as e:
print(f'[nbd] error: {e}')
finally:
conn.close()
def main():
print(f'[nbd] PERC H710 recovery NBD server v2')
print(f'[nbd] device: {DEV}')
print(f'[nbd] lv_start: byte {LV_PHYS_START} (sector {LV_PHYS_START//512})')
print(f'[nbd] virt_size: {VIRT_SIZE // (1024**3):.1f} GB')
print(f'[nbd] features: chunk-skip + sb-patch + gdt-synth')
print()
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('127.0.0.1', 10809))
server.listen(5)
print(f'[nbd] listening on 127.0.0.1:10809')
print(f'[nbd] connect with: nbd-client -g 127.0.0.1 10809 /dev/nbd0')
print(f'[nbd] then mount: mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root')
print()
while True:
conn, addr = server.accept()
t = threading.Thread(target=handle_client, args=(conn, addr),
daemon=True)
t.start()
if __name__ == '__main__':
main()

49802
test/orphaned.txt Normal file

File diff suppressed because it is too large Load Diff

12
test/orphaned_detail.txt Normal file
View File

@@ -0,0 +1,12 @@
Inode 1585918: mode=0x41c1 size=4096 links=8
Directory entries (10):
[INTACT] dir inode= 1585918 group= 193 '.'
[INTACT] dir inode= 1585910 group= 193 '..'
[INTACT] dir inode= 2786054 group= 340 '157a01c7efb651826b6cc4631f37eb1d0d8b0f32a5dd033cc15466545f310218'
[INTACT] dir inode= 11697136 group= 1427 '8eae55164cbf9ed92df48106c4ca77dccb5bde05ab79690359df689b7da67a05'
[INTACT] dir inode= 2786051 group= 340 '9b91d1f5800648b1611e9398703d2ed6b5c15ba100e21a57e339bb9ce8b9451f'
[INTACT] 4 inode= 1572872 group= 192 'backingFsBlockDev'
[INTACT] dir inode= 4195616 group= 512 'bracket_bracket_pg_data'
[INTACT] dir inode= 6456408 group= 788 'joomla_db_data'
[INTACT] dir inode= 6456224 group= 788 'joomla_joomla_data'
[INTACT] file inode= 1585920 group= 193 'metadata.db'

49842
test/orphaned_inodes.txt Normal file

File diff suppressed because one or more lines are too long

24939
test/orphans.txt Normal file

File diff suppressed because one or more lines are too long

38
test/patch.py Normal file
View File

@@ -0,0 +1,38 @@
import struct
with open('/tmp/fs_meta.img', 'r+b') as f:
# Patch primary superblock at byte 1024
f.seek(1024)
sb = bytearray(f.read(1024))
ro_compat = struct.unpack_from('<I', sb, 100)[0]
incompat = struct.unpack_from('<I', sb, 96)[0]
print(f'ro_compat before: 0x{ro_compat:08x}')
print(f'incompat before: 0x{incompat:08x}')
ro_compat &= ~0x400 # clear metadata_csum
ro_compat &= ~0x010 # clear gdt_csum
incompat &= ~0x004 # clear has_journal
struct.pack_into('<I', sb, 100, ro_compat)
struct.pack_into('<I', sb, 96, incompat)
struct.pack_into('<I', sb, 1020, 0) # clear sb checksum
f.seek(1024)
f.write(bytes(sb))
print(f'ro_compat after: 0x{ro_compat:08x}')
print(f'incompat after: 0x{incompat:08x}')
# Also patch every GDT entry checksum to 0
# GDT starts at byte 4096, each entry is 64 bytes
# checksum is at offset 30 within each entry
NUM_GROUPS = 35728
f.seek(4096)
gdt = bytearray(f.read(NUM_GROUPS * 64))
for g in range(NUM_GROUPS):
struct.pack_into('<H', gdt, g*64+30, 0)
f.seek(4096)
f.write(bytes(gdt))
print(f'Zeroed checksums for {NUM_GROUPS} GDT entries')
print('Done')

38
test/patch2.py Normal file
View File

@@ -0,0 +1,38 @@
import struct
with open('/tmp/fs_meta.img', 'r+b') as f:
f.seek(1024)
sb = bytearray(f.read(1024))
ro_compat = struct.unpack_from('<I', sb, 100)[0]
incompat = struct.unpack_from('<I', sb, 96)[0]
compat = struct.unpack_from('<I', sb, 92)[0]
print(f'Before: compat=0x{compat:08x} incompat=0x{incompat:08x} ro_compat=0x{ro_compat:08x}')
incompat &= ~0x004 # clear HAS_JOURNAL
ro_compat &= ~0x400 # clear METADATA_CSUM
ro_compat &= ~0x010 # clear GDT_CSUM
compat &= ~0x004 # clear HAS_JOURNAL from compat too
# Zero the journal inode number so e2fsck ignores it
struct.pack_into('<I', sb, 180, 0) # s_journal_inum = 0
struct.pack_into('<I', sb, 92, compat)
struct.pack_into('<I', sb, 96, incompat)
struct.pack_into('<I', sb, 100, ro_compat)
struct.pack_into('<I', sb, 1020, 0) # clear sb checksum
f.seek(1024)
f.write(bytes(sb))
print(f'After: compat=0x{compat:08x} incompat=0x{incompat:08x} ro_compat=0x{ro_compat:08x}')
# Zero all GDT checksums
with open('/tmp/fs_meta.img', 'r+b') as f:
f.seek(4096)
gdt = bytearray(f.read(35728 * 64))
for g in range(35728):
struct.pack_into('<H', gdt, g*64+30, 0)
f.seek(4096)
f.write(bytes(gdt))
print('GDT checksums zeroed')

40
test/patch3.sh Normal file
View File

@@ -0,0 +1,40 @@
python3 -c "
import struct
data = bytearray(open('/tmp/merged_gdt.bin','rb').read())
# Fix group 0 specifically
# It has flags=0x0004 (INODE_ZEROED) but missing INODE_UNINIT
# and has non-zero ib_csum=0x9f37 which won't match zeroed bitmap
g = 0
off = 0 # group 0 offset in GDT
flags = struct.unpack_from('<H', data, off+18)[0]
print(f'Group 0 flags before: 0x{flags:04x}')
# Set INODE_UNINIT (0x0002) so libext2fs skips inode bitmap validation
flags |= 0x0002
struct.pack_into('<H', data, off+18, flags)
# Zero the inode bitmap checksum to match actual zeroed data
struct.pack_into('<H', data, off+26, 0) # ib_csum_lo
struct.pack_into('<H', data, off+50, 0) # ib_csum_hi
print(f'Group 0 flags after: 0x{flags:04x}')
print(f'Group 0 ib_csum now: 0x0000')
with open('/tmp/merged_gdt.bin','wb') as f:
f.write(data)
print('Saved')
"
# Restart server
pkill -f nbd_server
nbd-client -d /dev/nbd0 2>/dev/null
sleep 1
python3 nbd_server_v9.py &
sleep 2
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""
e2fsck -n /dev/nbd0 2>&1 | head -20

41
test/patch4.py Normal file
View File

@@ -0,0 +1,41 @@
import struct, binascii
# We need to recompute the GDT checksum for group 0
# after modifying its flags
#
# ext4 GDT checksum = crc32c(uuid + group_num_le16 + gdt_entry_with_csum_zeroed)
# The checksum seed is stored in the superblock at offset 408
with open('/dev/nbd0','rb') as f:
f.seek(1024)
sb = f.read(1024)
uuid = sb[104:120]
csum_seed = struct.unpack_from('<I', sb, 408)[0]
print(f'UUID: {uuid.hex()}')
print(f'csum_seed: 0x{csum_seed:08x}')
# Install crcmod for crc32c
import crcmod
crc32c_fn = crcmod.predefined.mkCrcFun('crc-32c')
data = bytearray(open('/tmp/merged_gdt.bin','rb').read())
def compute_gdt_csum(g, entry):
e = bytearray(entry)
struct.pack_into('<H', e, 30, 0) # zero checksum field
grp_le = struct.pack('<H', g)
csum_data = uuid + grp_le + bytes(e)
csum = crc32c_fn(csum_data, csum_seed)
return csum & 0xFFFF
# Recompute checksum for group 0 only
g = 0
entry = data[0:64]
csum = compute_gdt_csum(g, entry)
struct.pack_into('<H', data, 30, csum)
print(f'Group 0 new checksum: 0x{csum:04x}')
with open('/tmp/merged_gdt.bin','wb') as f:
f.write(data)
print('Saved')

61
test/patch4.sh Normal file
View File

@@ -0,0 +1,61 @@
python3 -c "
import struct, binascii
# We need to recompute the GDT checksum for group 0
# after modifying its flags
#
# ext4 GDT checksum = crc32c(uuid + group_num_le16 + gdt_entry_with_csum_zeroed)
# The checksum seed is stored in the superblock at offset 408
with open('/dev/nbd0','rb') as f:
f.seek(1024)
sb = f.read(1024)
uuid = sb[104:120]
csum_seed = struct.unpack_from('<I', sb, 408)[0]
print(f'UUID: {uuid.hex()}')
print(f'csum_seed: 0x{csum_seed:08x}')
# Install crcmod for crc32c
import subprocess
subprocess.run(['pip','install','crcmod','--break-system-packages','-q'])
import crcmod
crc32c_fn = crcmod.predefined.mkCrcFun('crc-32c')
data = bytearray(open('/tmp/merged_gdt.bin','rb').read())
def compute_gdt_csum(g, entry):
e = bytearray(entry)
struct.pack_into('<H', e, 30, 0) # zero checksum field
grp_le = struct.pack('<H', g)
csum_data = uuid + grp_le + bytes(e)
csum = crc32c_fn(csum_data, csum_seed)
return csum & 0xFFFF
# Recompute checksum for group 0 only
g = 0
entry = data[0:64]
csum = compute_gdt_csum(g, entry)
struct.pack_into('<H', data, 30, csum)
print(f'Group 0 new checksum: 0x{csum:04x}')
with open('/tmp/merged_gdt.bin','wb') as f:
f.write(data)
print('Saved')
"
# Restart and test
pkill -f nbd_server
nbd-client -d /dev/nbd0 2>/dev/null
sleep 1
python3 nbd_server_v9.py &
sleep 2
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""
# Full e2fsck read-only check
e2fsck -n /dev/nbd0 2>&1 | tee /tmp/e2fsck_full.log
tail -5 /tmp/e2fsck_full.log
# Try mounting
mount -o ro,norecovery -t ext4 /dev/nbd0 /mnt/root
ls /mnt/root

76
test/patch5.sh Normal file
View File

@@ -0,0 +1,76 @@
pip install crcmod --break-system-packages -q
python3 -c "
import struct, crcmod
crc32c = crcmod.predefined.mkCrcFun('crc-32c')
with open('/dev/nbd0','rb') as f:
f.seek(1024)
sb = f.read(1024)
uuid = sb[104:120]
csum_seed = struct.unpack_from('<I', sb, 408)[0]
BSIZE = 4096
def compute_bitmap_csum(bitmap_data):
return crc32c(uuid + bitmap_data, csum_seed) & 0xFFFF
def compute_gdt_csum(g, entry):
e = bytearray(entry)
struct.pack_into('<H', e, 30, 0)
return crc32c(uuid + struct.pack('<H', g) + bytes(e), csum_seed) & 0xFFFF
data = bytearray(open('/tmp/merged_gdt.bin','rb').read())
print('Fixing groups 0-12 only (zeroed by fast initialization)...')
with open('/dev/nbd0','rb') as f:
for g in range(13):
e = data[g*64:(g+1)*64]
# Read actual inode bitmap (will be zeros for damaged groups)
ib_block = struct.unpack_from('<I', e, 4)[0]
f.seek(ib_block * BSIZE)
ib_data = f.read(BSIZE)
# Read actual block bitmap
bb_block = struct.unpack_from('<I', e, 0)[0]
f.seek(bb_block * BSIZE)
bb_data = f.read(BSIZE)
# Compute correct checksums from actual bitmap data
ib_csum = compute_bitmap_csum(ib_data)
bb_csum = compute_bitmap_csum(bb_data)
struct.pack_into('<H', data, g*64+26, ib_csum) # ib_csum_lo
struct.pack_into('<H', data, g*64+50, 0) # ib_csum_hi
struct.pack_into('<H', data, g*64+24, bb_csum) # bb_csum_lo
struct.pack_into('<H', data, g*64+48, 0) # bb_csum_hi
# Recompute GDT entry checksum
gdt_csum = compute_gdt_csum(g, data[g*64:(g+1)*64])
struct.pack_into('<H', data, g*64+30, gdt_csum)
print(f' Group {g:2d}: ib_csum=0x{ib_csum:04x} '
f'bb_csum=0x{bb_csum:04x} gdt_csum=0x{gdt_csum:04x}')
with open('/tmp/merged_gdt.bin','wb') as f:
f.write(data)
print('Saved')
"
pkill -f nbd_server
nbd-client -d /dev/nbd0 2>/dev/null
sleep 1
python3 nbd_server_v9.py &
sleep 2
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""
e2fsck -n /dev/nbd0 2>&1 | head -10
ext4magic /dev/nbd0 \
-s 4096 -n 32768 \
-M \
-a $(date -d "2023-01-01" +%s) \
-d /mnt/recovered \
2>&1 | tee /tmp/ext4magic.log

54
test/patch_gdt.py Normal file
View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""
Patch superblock to disable metadata_csum feature flag,
and zero GDT checksum fields for groups 0-12.
All writes go to dm-0 (overlay), nbd0 untouched.
"""
import struct
DEV = '/dev/dm-0'
BLOCK = 4096
BACKUP_SB_BLOCK = 32768
# Feature flag constants
INCOMPAT_64BIT = 0x80
RO_COMPAT_METADATA_CSUM = 0x400
with open(DEV, 'r+b') as f:
# Read primary superblock (already patched to block 0)
f.seek(1024)
sb = bytearray(f.read(1024))
# Check current feature flags
ro_compat = struct.unpack_from('<I', sb, 100)[0]
print(f"ro_compat_features: {ro_compat:#010x}")
print(f"metadata_csum set: {bool(ro_compat & RO_COMPAT_METADATA_CSUM)}")
# Clear metadata_csum
ro_compat &= ~RO_COMPAT_METADATA_CSUM
struct.pack_into('<I', sb, 100, ro_compat)
# Zero superblock checksum
struct.pack_into('<I', sb, 1020, 0)
# Write patched superblock back
f.seek(1024)
f.write(sb)
print("Patched superblock: metadata_csum cleared")
# Now zero bg_checksum in GDT entries for groups 0-12
# bg_checksum is at offset 30 in each 64-byte descriptor
f.seek(BLOCK) # GDT starts at block 1
gdt = bytearray(f.read(13 * 64)) # groups 0-12 only
for grp in range(13):
off = grp * 64
old_csum = struct.unpack_from('<H', gdt, off + 30)[0]
struct.pack_into('<H', gdt, off + 30, 0)
print(f" group {grp:2d}: cleared checksum {old_csum:#06x}")
f.seek(BLOCK)
f.write(gdt)
print("Patched GDT checksums for groups 0-12")
print("Done - try debugfs again")

5
test/rdump_all.sh Normal file
View File

@@ -0,0 +1,5 @@
while read inum rest; do
mkdir -p /mnt/recovered/apr29/${inum}
python3.12 dump_tree.py ${inum} /mnt/recovered/apr29/${inum}/ &
done < true_roots.txt
wait

306
test/rebuild.py Normal file
View File

@@ -0,0 +1,306 @@
#!/usr/bin/env python3
"""
Full filesystem extraction using TSK ils + fls + icat.
Strategy:
1. ils - get every allocated inode
2. For each inode, determine if file or directory
3. Build directory tree bottom-up using parent pointers (..)
4. Extract everything, place orphans in /orphans/<inode>
"""
import subprocess, os, sys, struct, collections
DEVICE = '/dev/nbd0'
OUTDIR = '/mnt/recovered'
IPG = 8192
MIN_GOOD_GROUP = 13 # groups 0-12 are zeroed
def run(cmd, timeout=600):
try:
r = subprocess.run(cmd, capture_output=True,
text=True, timeout=timeout)
return r.stdout, r.stderr
except subprocess.TimeoutExpired:
return '', 'timeout'
except Exception as e:
return '', str(e)
def run_binary(cmd, timeout=600):
try:
r = subprocess.run(cmd, capture_output=True, timeout=timeout)
return r.stdout
except:
return b''
# ── Phase 1: get all allocated inodes via ils ─────────────────────────────────
def get_all_inodes():
print('Running ils to enumerate all allocated inodes...')
print('(This may take 30-60 minutes for a 4.4TB filesystem)')
stdout, _ = run(['ils', '-e', DEVICE], timeout=7200)
inodes = {} # inode -> {'type': 'f'/'d', 'size': n, 'mtime': n}
for line in stdout.splitlines():
if line.startswith('|') or not line.strip():
continue
try:
# ils -e format:
# inode|alloc|uid|gid|mtime|atime|ctime|dtime|mode|nlink|size|...
fields = line.split('|')
ino = int(fields[0])
alloc = fields[1] # 'a' = allocated, 'f' = free
mode = int(fields[8]) if len(fields) > 8 else 0
size = int(fields[10]) if len(fields) > 10 else 0
mtime = int(fields[4]) if len(fields) > 4 else 0
if alloc != 'a':
continue
if ino <= 11:
continue
grp = (ino - 1) // IPG
if grp < MIN_GOOD_GROUP:
continue
# Determine type from mode
ftype = (mode & 0o170000)
if ftype == 0o040000:
t = 'd'
elif ftype == 0o100000:
t = 'f'
elif ftype == 0o120000:
t = 'l'
else:
t = 'o' # other
inodes[ino] = {'type': t, 'size': size, 'mtime': mtime}
except (ValueError, IndexError):
continue
print(f'Found {len(inodes)} allocated inodes in intact groups')
dirs = sum(1 for v in inodes.values() if v['type'] == 'd')
files = sum(1 for v in inodes.values() if v['type'] == 'f')
print(f' Directories: {dirs}')
print(f' Files: {files}')
return inodes
# ── Phase 2: build directory tree using fls ───────────────────────────────────
def build_tree(dir_inodes):
"""
For each directory inode, run fls to get its contents.
Build a map of inode -> (parent_inode, name).
"""
print(f'\nBuilding directory tree from {len(dir_inodes)} directory inodes...')
# inode -> (parent_inode, name)
inode_path = {}
# inode -> [(child_inode, name, type)]
inode_children = collections.defaultdict(list)
processed = 0
for dir_ino in dir_inodes:
stdout, _ = run(['fls', DEVICE, str(dir_ino)], timeout=30)
parent_ino = None
for line in stdout.splitlines():
try:
parts = line.split(None, 2)
if len(parts) < 3: continue
type_str = parts[0]
ino_str = parts[1].rstrip(':').lstrip('*')
name = parts[2].strip()
ino = int(ino_str)
etype = type_str[0]
if name == '..':
parent_ino = ino
continue
if name == '.':
continue
inode_children[dir_ino].append((ino, name, etype))
except (ValueError, IndexError):
continue
if parent_ino is not None:
inode_path[dir_ino] = parent_ino
processed += 1
if processed % 1000 == 0:
print(f' Processed {processed}/{len(dir_inodes)} directories...',
flush=True)
return inode_path, inode_children
# ── Phase 3: resolve paths ────────────────────────────────────────────────────
def resolve_paths(inode_path, inode_children, all_inodes):
"""
Walk parent pointers to build full paths for each directory.
Directories whose parent chain leads to a lost inode go to /orphans/.
"""
print('\nResolving full paths...')
# resolved_dirs: inode -> full path string
resolved = {}
def get_path(ino, depth=0):
if depth > 50: # cycle protection
return None
if ino in resolved:
return resolved[ino]
parent = inode_path.get(ino)
if parent is None or parent == ino:
# Root or unknown parent
path = f'orphans/dir_{ino}'
resolved[ino] = path
return path
grp = (parent - 1) // IPG
if grp < MIN_GOOD_GROUP:
# Parent is in lost group — this is an orphan root
# Try to find the directory name from the parent's children
# We can't — parent inode is gone
path = f'orphans/dir_{ino}'
resolved[ino] = path
return path
parent_path = get_path(parent, depth + 1)
if parent_path is None:
path = f'orphans/dir_{ino}'
else:
# Find our name in parent's children
name = f'inode_{ino}'
for child_ino, child_name, _ in inode_children.get(parent, []):
if child_ino == ino:
name = child_name
break
path = os.path.join(parent_path, name)
resolved[ino] = path
return path
for ino in inode_path:
get_path(ino)
return resolved
# ── Phase 4: extract ──────────────────────────────────────────────────────────
def extract_all(resolved_dirs, inode_children, all_inodes):
print(f'\nExtracting files...')
stats = {'ok': 0, 'err': 0, 'bytes': 0}
extracted = set()
# Extract files reachable from directory tree
for dir_ino, dir_path in resolved_dirs.items():
abs_dir = os.path.join(OUTDIR, dir_path)
os.makedirs(abs_dir, exist_ok=True)
for child_ino, name, etype in inode_children.get(dir_ino, []):
if child_ino in extracted:
continue
outpath = os.path.join(abs_dir, name)
if etype == 'r':
try:
os.makedirs(abs_dir, exist_ok=True)
with open(outpath, 'wb') as f:
subprocess.run(
['icat', DEVICE, str(child_ino)],
stdout=f, stderr=subprocess.DEVNULL,
timeout=600
)
size = os.path.getsize(outpath)
stats['ok'] += 1
stats['bytes'] += size
extracted.add(child_ino)
if stats['ok'] % 100 == 0:
print(f' {stats["ok"]} files extracted, '
f'{stats["bytes"]/1024**3:.2f}GB...', flush=True)
except Exception as e:
stats['err'] += 1
elif etype == 'l':
try:
r = subprocess.run(
['icat', DEVICE, str(child_ino)],
capture_output=True, timeout=10
)
target = r.stdout.decode('utf-8', errors='replace').strip()
if target:
if os.path.lexists(outpath): os.remove(outpath)
os.symlink(target, outpath)
extracted.add(child_ino)
stats['ok'] += 1
except:
stats['err'] += 1
# Extract orphaned files (allocated but not in any directory)
print(f'\nExtracting orphaned files...')
orphan_dir = os.path.join(OUTDIR, 'orphans', 'files')
os.makedirs(orphan_dir, exist_ok=True)
for ino, info in all_inodes.items():
if ino in extracted: continue
if info['type'] != 'f': continue
if info['size'] == 0: continue
outpath = os.path.join(orphan_dir, str(ino))
try:
with open(outpath, 'wb') as f:
subprocess.run(
['icat', DEVICE, str(ino)],
stdout=f, stderr=subprocess.DEVNULL,
timeout=600
)
size = os.path.getsize(outpath)
if size > 0:
stats['ok'] += 1
stats['bytes'] += size
extracted.add(ino)
else:
os.remove(outpath)
except:
stats['err'] += 1
return stats
def main():
os.makedirs(OUTDIR, exist_ok=True)
print(f'Device : {DEVICE}')
print(f'Output : {OUTDIR}')
print()
# Phase 1: enumerate all inodes
all_inodes = get_all_inodes()
if not all_inodes:
print('ERROR: ils returned no inodes - is NBD server running?')
sys.exit(1)
dir_inodes = [ino for ino, info in all_inodes.items()
if info['type'] == 'd']
# Phase 2: build tree
inode_path, inode_children = build_tree(dir_inodes)
# Phase 3: resolve paths
resolved_dirs = resolve_paths(inode_path, inode_children, all_inodes)
intact = sum(1 for p in resolved_dirs.values()
if not p.startswith('orphans'))
orphaned = sum(1 for p in resolved_dirs.values()
if p.startswith('orphans'))
print(f'Directories with resolved paths: {intact}')
print(f'Orphaned directories: {orphaned}')
# Phase 4: extract
stats = extract_all(resolved_dirs, inode_children, all_inodes)
print()
print('=== COMPLETE ===')
print(f'Files OK: {stats["ok"]}')
print(f'Files ERR: {stats["err"]}')
print(f'Total data: {stats["bytes"]/1024**3:.2f} GB')
print(f'Output: {OUTDIR}')
if __name__ == '__main__':
main()

207
test/recursivedump.py Normal file
View File

@@ -0,0 +1,207 @@
#!/usr/bin/env python3
"""
Recursive ext4 directory dumper by inode number.
Bypasses all metadata validation - uses extent trees directly.
"""
import struct, os, sys, stat
from pathlib import Path
DEV = '/dev/dm-0'
BLOCK = 4096
BACKUP_SB_BLOCK = 32768
# ── low-level helpers ────────────────────────────────────────────────────────
def read_at(f, offset, size):
f.seek(offset)
return f.read(size)
def parse_superblock(data):
sb = {}
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
return sb
def parse_gdt_entry(gdt_data, offset, desc_size):
lo = struct.unpack_from('<I', gdt_data, offset + 8)[0]
if desc_size >= 64:
hi = struct.unpack_from('<I', gdt_data, offset + 40)[0]
return lo | (hi << 32)
return lo
def parse_extent_tree(data, inode_offset):
base = inode_offset + 40
magic, entries, _, depth = struct.unpack_from('<HHHH', data, base)
if magic != 0xF30A:
return []
extents = []
if depth == 0:
for i in range(min(entries, 4)):
o = base + 12 + i * 12
if o + 12 > len(data): break
l_block = struct.unpack_from('<I', data, o )[0]
ee_len = struct.unpack_from('<H', data, o + 4)[0]
start_hi = struct.unpack_from('<H', data, o + 6)[0]
start_lo = struct.unpack_from('<I', data, o + 8)[0]
phys = (start_hi << 32) | start_lo
if phys > 0:
extents.append((l_block, phys, ee_len & 0x7FFF))
else:
# Depth > 0: extent index node - follow first child
# (handles large dirs gracefully)
o = base + 12
ei_leaf_lo = struct.unpack_from('<I', data, o + 4)[0]
ei_leaf_hi = struct.unpack_from('<H', data, o + 8)[0]
extents.append((0, (ei_leaf_hi << 32) | ei_leaf_lo, 1))
return extents
def read_inode(f, sb, gdt_data, inum):
"""Return raw inode block data and offset within it."""
grp = (inum - 1) // sb['inodes_per_group']
local_idx = (inum - 1) % sb['inodes_per_group']
tbl_block = parse_gdt_entry(gdt_data, grp * sb['desc_size'], sb['desc_size'])
byte_off = local_idx * sb['inode_size']
blk_off = byte_off // BLOCK
slot = byte_off % BLOCK
data = read_at(f, (tbl_block + blk_off) * BLOCK, BLOCK)
return data, slot
def read_dir_entries(f, sb, gdt_data, inum):
"""Return dict of name -> (child_inum, ftype)."""
idata, slot = read_inode(f, sb, gdt_data, inum)
entries = {}
for _, phys, length in parse_extent_tree(idata, slot):
for blk in range(length):
try:
bdata = read_at(f, (phys + blk) * BLOCK, BLOCK)
offset = 0
while offset < BLOCK - 8:
e_ino, rec_len, name_len, ftype = \
struct.unpack_from('<IHBB', bdata, offset)
if rec_len < 8 or offset + rec_len > BLOCK:
break
if e_ino != 0 and name_len > 0:
name = bdata[offset+8:offset+8+name_len]\
.decode('utf-8', errors='replace')
entries[name] = (e_ino, ftype)
offset += rec_len
except OSError:
pass
return entries
def dump_file(f, sb, gdt_data, inum, dest_path):
"""Extract a regular file by inode to dest_path."""
try:
idata, slot = read_inode(f, sb, gdt_data, inum)
size_lo = struct.unpack_from('<I', idata, slot + 4)[0]
size_hi = struct.unpack_from('<I', idata, slot + 108)[0]
size = size_lo | (size_hi << 32)
extents = parse_extent_tree(idata, slot)
# Check for inline data (EXT4_INLINE_DATA_FL = 0x10000000)
flags = struct.unpack_from('<I', idata, slot + 32)[0]
if flags & 0x10000000:
# Data stored in inode body - skip for now
return False
written = 0
with open(dest_path, 'wb') as out:
for _, phys, length in sorted(extents):
for blk in range(length):
if written >= size:
break
chunk = read_at(f, (phys + blk) * BLOCK, BLOCK)
remaining = size - written
out.write(chunk[:remaining] if remaining < BLOCK else chunk)
written += min(BLOCK, remaining)
return True
except OSError:
return False
def dump_symlink(f, sb, gdt_data, inum, dest_path):
"""Extract symlink target."""
try:
idata, slot = read_inode(f, sb, gdt_data, inum)
size = struct.unpack_from('<I', idata, slot + 4)[0]
if size <= 60:
# Fast symlink - target in inode block area
target = idata[slot+40:slot+40+size].decode('utf-8', errors='replace')
else:
extents = parse_extent_tree(idata, slot)
if not extents:
return False
bdata = read_at(f, extents[0][1] * BLOCK, BLOCK)
target = bdata[:size].decode('utf-8', errors='replace')
os.symlink(target, dest_path)
return True
except (OSError, IndexError):
return False
# ── recursive dumper ─────────────────────────────────────────────────────────
FTYPE_REG = 1
FTYPE_DIR = 2
FTYPE_SYM = 7
def dump_tree(f, sb, gdt_data, inum, dest_dir, depth=0, visited=None):
if visited is None:
visited = set()
if inum in visited:
return
visited.add(inum)
try:
entries = read_dir_entries(f, sb, gdt_data, inum)
except Exception:
return
os.makedirs(dest_dir, exist_ok=True)
for name, (child_inum, ftype) in entries.items():
if name in ('.', '..'):
continue
# Sanitise name
safe_name = name.replace('/', '_').replace('\x00', '')
dest = os.path.join(dest_dir, safe_name)
try:
if ftype == FTYPE_DIR:
dump_tree(f, sb, gdt_data, child_inum, dest, depth+1, visited)
elif ftype == FTYPE_REG:
dump_file(f, sb, gdt_data, child_inum, dest)
elif ftype == FTYPE_SYM:
dump_symlink(f, sb, gdt_data, child_inum, dest)
except Exception as e:
print(f" WARN: {dest}: {e}", file=sys.stderr)
# ── main ─────────────────────────────────────────────────────────────────────
def main():
if len(sys.argv) < 3:
print(f"Usage: {sys.argv[0]} <inode> <dest_dir>")
sys.exit(1)
root_inum = int(sys.argv[1])
dest_dir = sys.argv[2]
with open(DEV, 'rb') as f:
sb_data = read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
sb = parse_superblock(sb_data)
assert sb['magic'] == 0xef53
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
// sb['blocks_per_group']
gdt_data = read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
num_groups * sb['desc_size'])
print(f"Dumping inode {root_inum} -> {dest_dir}")
dump_tree(f, sb, gdt_data, root_inum, dest_dir)
print("Done")
if __name__ == '__main__':
main()

227
test/rescue.sh Normal file
View File

@@ -0,0 +1,227 @@
cat > /tmp/reconstruct_tree.py << 'EOF'
#!/usr/bin/env python3
"""
Reconstruct full directory tree from inode tables.
Attaches orphaned subtrees to lost+found.
Extracts everything using icat/debugfs.
"""
import struct, os, subprocess, collections
DEVICE = '/dev/nbd0'
OUTDIR = '/mnt/recovered/reconstructed'
BSIZE = 4096
IPG = 8192
INODE_SZ = 256
NUM_GROUPS = 35728
MIN_GROUP = 13
# inode -> parent_inode (from .. entry)
parent_of = {}
# inode -> [(child_inode, name, ftype)]
children = collections.defaultdict(list)
# inode -> name (as seen from parent's directory block)
inode_name = {}
# all directory inodes found
dir_inodes = set()
def parse_extents(inode_data):
blocks = []
magic = struct.unpack_from('<H', inode_data, 40)[0]
if magic != 0xf30a: return blocks
entries = struct.unpack_from('<H', inode_data, 42)[0]
depth = struct.unpack_from('<H', inode_data, 46)[0]
if depth == 0:
for i in range(min(entries, 4)):
off = 52 + i*12
ee_len = struct.unpack_from('<H', inode_data, off+4)[0]
ee_hi = struct.unpack_from('<H', inode_data, off+6)[0]
ee_lo = struct.unpack_from('<I', inode_data, off+8)[0]
ee_start = (ee_hi<<32)|ee_lo
if ee_len > 1024: continue
for b in range(min(ee_len, 8)):
blocks.append(ee_start + b)
return blocks
def read_dirents(f, inode_data):
entries = []
for blk in parse_extents(inode_data):
try:
f.seek(blk * BSIZE)
data = f.read(BSIZE)
except OSError:
continue
off = 0
while off < BSIZE - 8:
ino = struct.unpack_from('<I', data, off)[0]
rec_len = struct.unpack_from('<H', data, off+4)[0]
name_len = data[off+6]
ftype = data[off+7]
if rec_len < 8: break
if ino > 0 and name_len > 0:
name = data[off+8:off+8+name_len].decode('utf-8',errors='replace')
entries.append((ino, name, ftype))
off += rec_len
return entries
# ── Phase 1: scan all inode tables ───────────────────────────────────────────
print('Phase 1: Scanning inode tables...')
with open(DEVICE, 'rb', buffering=0) as f:
for group in range(MIN_GROUP, NUM_GROUPS):
it_block = 1070 + group * 512
try:
f.seek(it_block * BSIZE)
inode_table = f.read(IPG * INODE_SZ)
except OSError:
continue
for idx in range(IPG):
inode_data = inode_table[idx*INODE_SZ:(idx+1)*INODE_SZ]
if not any(inode_data): continue
mode = struct.unpack_from('<H', inode_data, 0)[0]
links = struct.unpack_from('<H', inode_data, 26)[0]
if (mode & 0xf000) != 0x4000: continue
if links < 2: continue
inode_num = group * IPG + idx + 1
dir_inodes.add(inode_num)
# Read directory entries to find parent and children
entries = read_dirents(f, inode_data)
for ino, name, ftype in entries:
if name == '..':
parent_of[inode_num] = ino
elif name != '.':
children[inode_num].append((ino, name, ftype))
inode_name[ino] = name
if group % 2000 == 0:
print(f' Group {group}/{NUM_GROUPS}: '
f'{len(dir_inodes)} dirs found...', flush=True)
print(f'Found {len(dir_inodes)} directory inodes')
print(f'Found {len(parent_of)} directories with known parents')
# ── Phase 2: find orphan roots ────────────────────────────────────────────────
print('\nPhase 2: Finding orphan roots...')
def resolve_path(inode, depth=0, visited=None):
if visited is None: visited = set()
if inode in visited: return None
visited.add(inode)
if depth > 50: return None
parent = parent_of.get(inode)
if parent is None:
return f'lost+found/unknown_{inode}'
grp = (parent-1) // IPG
if grp < MIN_GROUP:
# Parent is in zeroed region - this is an orphan root
name = inode_name.get(inode, f'inode_{inode}')
return f'lost+found/{name}_{inode}'
if parent not in dir_inodes:
# Parent not found in our scan
name = inode_name.get(inode, f'inode_{inode}')
return f'lost+found/{name}_{inode}'
parent_path = resolve_path(parent, depth+1, visited)
if parent_path is None:
return f'lost+found/inode_{inode}'
name = inode_name.get(inode, f'inode_{inode}')
return os.path.join(parent_path, name)
# Resolve paths for all directories
print('Resolving paths...')
resolved = {}
for ino in dir_inodes:
resolved[ino] = resolve_path(ino)
# Summary
in_lf = sum(1 for p in resolved.values() if p and p.startswith('lost+found/')
and p.count('/') == 1)
deep = sum(1 for p in resolved.values() if p and not p.startswith('lost+found'))
print(f'Orphan roots in lost+found: {in_lf}')
print(f'Dirs with resolved paths: {deep}')
# Show interesting paths
print('\nInteresting resolved paths:')
for ino, path in sorted(resolved.items(), key=lambda x: x[1] or ''):
if path and any(x in path for x in ['pterodactyl','docker','mysql',
'www','nginx','var','log']):
print(f' inode {ino:10d}: {path}')
# Save tree
with open('/tmp/resolved_tree.txt','w') as f:
for ino, path in sorted(resolved.items(), key=lambda x: x[1] or ''):
f.write(f'{ino}\t{path or "unknown"}\n')
print(f'\nSaved {len(resolved)} paths to /tmp/resolved_tree.txt')
# ── Phase 3: extract ──────────────────────────────────────────────────────────
print('\nPhase 3: Extracting...')
os.makedirs(OUTDIR, exist_ok=True)
stats = {'dirs':0, 'files_ok':0, 'files_err':0, 'bytes':0}
# Create all directories first
for ino, path in sorted(resolved.items(), key=lambda x: len(x[1] or '')):
if not path: continue
abs_path = os.path.join(OUTDIR, path)
os.makedirs(abs_path, exist_ok=True)
stats['dirs'] += 1
# Extract files in each directory
for dir_ino, path in resolved.items():
if not path: continue
abs_dir = os.path.join(OUTDIR, path)
for child_ino, name, ftype in children.get(dir_ino, []):
# Skip if it's a directory (already created)
if child_ino in dir_inodes: continue
outpath = os.path.join(abs_dir, name)
if ftype == 1: # regular file
try:
with open(outpath, 'wb') as out:
subprocess.run(
['icat', DEVICE, str(child_ino)],
stdout=out, stderr=subprocess.DEVNULL,
timeout=300
)
size = os.path.getsize(outpath)
stats['files_ok'] += 1
stats['bytes'] += size
if stats['files_ok'] % 500 == 0:
print(f' {stats["files_ok"]} files, '
f'{stats["bytes"]/1024**3:.2f}GB...', flush=True)
except Exception as e:
stats['files_err'] += 1
elif ftype == 7: # symlink
try:
r = subprocess.run(
['icat', DEVICE, str(child_ino)],
capture_output=True, timeout=10
)
target = r.stdout.decode('utf-8',errors='replace').strip()
if target:
if os.path.lexists(outpath): os.remove(outpath)
os.symlink(target, outpath)
stats['files_ok'] += 1
except:
stats['files_err'] += 1
print()
print('=== COMPLETE ===')
print(f'Directories: {stats["dirs"]}')
print(f'Files OK: {stats["files_ok"]}')
print(f'Files ERR: {stats["files_err"]}')
print(f'Total data: {stats["bytes"]/1024**3:.2f}GB')
print(f'Output: {OUTDIR}')
EOF
python3 /tmp/reconstruct_tree.py 2>&1 | tee /tmp/reconstruct.log

131
test/restore_meta.py Normal file
View File

@@ -0,0 +1,131 @@
#!/usr/bin/env python3
"""
Restore ext4 metadata (permissions, ownership, timestamps) to a recovered tree.
Run after dump_tree.py has extracted files.
Usage: python3 restore_meta.py <inode> <dest_dir>
"""
import struct, os, sys, stat, ctypes, ctypes.util
DEV = '/dev/dm-0'
BLOCK = 4096
BACKUP_SB_BLOCK = 32768
# ── reuse same low-level helpers from dump_tree.py ───────────────────────────
# (paste parse_superblock, parse_gdt_entry, read_at, read_inode,
# read_extent_tree_blocks, read_dir_entries here)
# or factor them into a shared ext4lib.py and import from both scripts
import ext4lib
libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
class Timeval(ctypes.Structure):
_fields_ = [('tv_sec', ctypes.c_long), ('tv_usec', ctypes.c_long)]
def lutimes(path, atime, mtime):
times = (Timeval * 2)((atime, 0), (mtime, 0))
libc.lutimes(path.encode(), ctypes.byref(times))
def get_inode_meta(idata, slot, sb):
mode = struct.unpack_from('<H', idata, slot + 0)[0]
uid = struct.unpack_from('<H', idata, slot + 2)[0]
gid = struct.unpack_from('<H', idata, slot + 24)[0]
atime = struct.unpack_from('<I', idata, slot + 8)[0]
mtime = struct.unpack_from('<I', idata, slot + 16)[0]
if sb['inode_size'] >= 256:
atime_extra = struct.unpack_from('<I', idata, slot + 132)[0]
mtime_extra = struct.unpack_from('<I', idata, slot + 140)[0]
atime |= (atime_extra & 0x3) << 32
mtime |= (mtime_extra & 0x3) << 32
uid_hi, gid_hi = struct.unpack_from('<HH', idata, slot + 120)
uid |= uid_hi << 16
gid |= gid_hi << 16
return stat.S_IMODE(mode), uid, gid, atime, mtime
def restore_meta(f, sb, gdt_data, inum, dest_path):
try:
idata, slot = ext4lib.read_inode(f, sb, gdt_data, inum)
mode, uid, gid, atime, mtime = get_inode_meta(idata, slot, sb)
is_symlink = os.path.islink(dest_path)
try:
os.lchown(dest_path, uid, gid)
except OSError:
pass
if not is_symlink:
try:
os.chmod(dest_path, mode)
except OSError:
pass
try:
lutimes(dest_path, atime, mtime)
except Exception:
pass
except Exception as e:
print(f" WARN {dest_path}: {e}", file=sys.stderr)
def walk_and_restore(f, sb, gdt_data, inum, dest_dir, visited=None):
if visited is None:
visited = set()
if inum in visited:
return
visited.add(inum)
# Restore the directory itself
restore_meta(f, sb, gdt_data, inum, dest_dir)
try:
entries = ext4lib.read_dir_entries(f, sb, gdt_data, inum)
except Exception:
return
for name, (child_inum, ftype) in entries.items():
if name in ('.', '..'):
continue
safe_name = name.replace('/', '_').replace('\x00', '')
dest = os.path.join(dest_dir, safe_name)
if not os.path.lexists(dest):
# File wasn't recovered - skip
continue
if os.path.isdir(dest) and not os.path.islink(dest):
walk_and_restore(f, sb, gdt_data, child_inum, dest, visited)
else:
restore_meta(f, sb, gdt_data, child_inum, dest)
# Restore directory timestamps AFTER processing children
# (writing children updates parent dir mtime/atime)
restore_meta(f, sb, gdt_data, inum, dest_dir)
def main():
if len(sys.argv) < 3:
print(f"Usage: {sys.argv[0]} <inode> <dest_dir>")
sys.exit(1)
root_inum = int(sys.argv[1])
dest_dir = sys.argv[2]
with open(DEV, 'rb') as f:
sb_data = ext4lib.read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
sb = ext4lib.parse_superblock(sb_data)
assert sb['magic'] == 0xef53
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
// sb['blocks_per_group']
gdt_data = ext4lib.read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
num_groups * sb['desc_size'])
print(f"Restoring metadata: inode {root_inum} -> {dest_dir}")
walk_and_restore(f, sb, gdt_data, root_inum, dest_dir)
print("Done")
if __name__ == '__main__':
main()

154
test/scan.py Normal file
View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
"""
Strict ext4 directory entry scanner for pterodactyl paths.
"""
import struct
CHUNK = 128 * 512
LV_START = 5120000 * 512
BSIZE = 4096
DISKS = ['/dev/sda', '/dev/sdd', '/dev/sdc', '/dev/sdb']
# Only exact target names we expect as directory entries
EXACT_TARGETS = [
b'pterodactyl',
b'volumes',
b'wings',
]
def is_valid_dirent(block, off, name):
"""Strict validation of an ext4 directory entry."""
if off + 8 + len(name) > BSIZE:
return False
inode = struct.unpack_from('<I', block, off)[0]
rec_len = struct.unpack_from('<H', block, off+4)[0]
name_len = block[off+6]
ftype = block[off+7]
# inode must be plausible (> 10, not absurdly large)
if not (10 < inode < 500_000_000):
return False
# name_len must exactly match our target
if name_len != len(name):
return False
# rec_len must be >= 8 + name_len and <= 4096
# and aligned to 4 bytes
min_rec = 8 + name_len
if rec_len < min_rec or rec_len > BSIZE or rec_len % 4 != 0:
return False
# file type must be a known ext4 type
if ftype not in (1, 2, 7): # file, dir, symlink only
return False
# the name bytes must match exactly and be clean ASCII
actual_name = block[off+8:off+8+name_len]
if actual_name != name:
return False
# byte immediately after name (padding) should be 0
pad_off = off + 8 + name_len
if pad_off < BSIZE and block[pad_off] != 0:
return False
# Previous entry should also look valid if we're not at start of block
# (skip this check for now - too complex)
return True
def scan_block(block, phys_base):
hits = []
for off in range(0, BSIZE - 8):
for target in EXACT_TARGETS:
# Quick check: does target appear at this offset+8?
if block[off+8:off+8+len(target)] != target:
continue
if is_valid_dirent(block, off, target):
inode = struct.unpack_from('<I', block, off)[0]
rec_len = struct.unpack_from('<H', block, off+4)[0]
ftype = block[off+7]
grp = (inode - 1) // 8192
hits.append({
'phys': phys_base + off,
'inode': inode,
'name': target.decode(),
'ftype': {1:'file',2:'dir',7:'symlink'}.get(ftype,'?'),
'group': grp,
'intact': grp >= 13,
'rec_len': rec_len,
})
return hits
def iter_data_chunks(disk_path):
with open(disk_path, 'rb') as f:
f.seek(0, 2)
disk_size = f.tell()
chunk_num = 0
with open(disk_path, 'rb') as f:
phys = LV_START
while phys + CHUNK <= disk_size:
if chunk_num % 5 != 4:
f.seek(phys)
yield phys, f.read(CHUNK)
phys += CHUNK
chunk_num += 1
def main():
all_hits = []
for disk_idx, disk in enumerate(DISKS):
print(f'\nScanning {disk}...', flush=True)
chunks = 0
hits = 0
for phys, chunk_data in iter_data_chunks(disk):
# Pre-filter: any target in chunk?
if not any(t in chunk_data for t in EXACT_TARGETS):
chunks += 1
continue
# Scan each 4KB block in chunk
for blk in range(0, len(chunk_data), BSIZE):
block = chunk_data[blk:blk+BSIZE]
for hit in scan_block(block, phys + blk):
status = 'INTACT' if hit['intact'] else 'LOST'
print(f" [{status}] '{hit['name']}' "
f"inode={hit['inode']} "
f"group={hit['group']} "
f"type={hit['ftype']} "
f"phys={hit['phys']}")
all_hits.append((disk_idx, hit))
hits += 1
chunks += 1
if chunks % 5000 == 0:
gb = (phys - LV_START) / 1024**3
print(f' {disk}: {gb:.1f}GB, {hits} hits', flush=True)
print(f' Finished: {hits} hits')
print('\n=== RESULTS ===')
# Group by name and inode
from collections import defaultdict
by_inode = defaultdict(list)
for disk_idx, hit in all_hits:
key = (hit['inode'], hit['name'])
by_inode[key].append((DISKS[disk_idx], hit['phys']))
print(f'\nUnique (inode, name) pairs: {len(by_inode)}')
for (inode, name), locations in sorted(by_inode.items()):
grp = (inode-1)//8192
status = 'INTACT' if grp >= 13 else 'LOST'
print(f" '{name}' inode={inode} group={grp} [{status}]")
for disk, phys in locations[:3]:
print(f" {disk} phys={phys}")
if __name__ == '__main__':
main()

63
test/scan1.sh Normal file
View File

@@ -0,0 +1,63 @@
# debugfs can search for directory entries by name
# even without a valid path
debugfs -c /dev/nbd0 << 'EOF'
stat <1585918>
EOF
# Use the parent inode from stat to walk upward
# inode 1585918 is 'volumes' - its .. entry points to pterodactyl dir
# pterodactyl's .. points to lib
# lib's .. points to var
# Find parent chain by reading .. entries
python3 -c "
import subprocess
def get_parent(inode):
r = subprocess.run(
['debugfs', '-c', '-R', f'ls -l <{inode}>', '/dev/nbd0'],
capture_output=True, text=True
)
for line in r.stdout.splitlines():
if '..' in line:
parts = line.split()
for p in parts:
try:
n = int(p)
if n > 0 and n != inode:
return n
except: pass
return None
def get_name(inode):
r = subprocess.run(
['debugfs', '-c', '-R', f'stat <{inode}>', '/dev/nbd0'],
capture_output=True, text=True
)
return r.stdout[:200]
# Walk up from volumes inode
inode = 1585918
chain = [inode]
print(f'Walking up from inode {inode} (volumes)...')
for i in range(10):
parent = get_parent(inode)
if parent is None or parent == inode or parent in chain:
break
print(f' inode {inode} -> parent {parent}')
chain.append(parent)
inode = parent
print(f'Chain: {chain}')
print()
# Now list each parent to find siblings of pterodactyl/volumes
for ino in chain[1:]:
print(f'Contents of inode {ino}:')
r = subprocess.run(
['debugfs', '-c', '-R', f'ls <{ino}>', '/dev/nbd0'],
capture_output=True, text=True
)
print(r.stdout[:500])
print()
"

242
test/scan_inodes.py Normal file
View File

@@ -0,0 +1,242 @@
#!/usr/bin/env python3
"""
Scan ext4 filesystem for orphaned directory roots.
Reads inode table directly using geometry from backup superblock.
"""
import struct, sys
from collections import defaultdict
DEV = '/dev/dm-0'
BLOCK = 4096
BACKUP_SB_BLOCK = 32768
def read_at(f, offset, size):
f.seek(offset)
return f.read(size)
def parse_superblock(data):
sb = {}
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
sb['feature_incompat'] = struct.unpack_from('<I', data, 96)[0]
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
return sb
def parse_gdt_entry(data, offset, desc_size):
"""Parse group descriptor - handles both 32 and 64-bit descriptors"""
inode_table_lo = struct.unpack_from('<I', data, offset + 8)[0]
if desc_size >= 64:
inode_table_hi = struct.unpack_from('<I', data, offset + 40)[0]
return inode_table_lo | (inode_table_hi << 32)
return inode_table_lo
def parse_extent_tree(data, inode_offset):
base = inode_offset + 40
magic, entries, max_entries, depth = struct.unpack_from('<HHHH', data, base)
if magic != 0xF30A:
return []
extents = []
if depth == 0:
for i in range(min(entries, 4)):
ext_off = base + 12 + i * 12
if ext_off + 12 > len(data):
break
# Correct layout: l_block(4) + ee_len(2) + ee_start_hi(2) + ee_start_lo(4)
l_block = struct.unpack_from('<I', data, ext_off)[0]
ee_len = struct.unpack_from('<H', data, ext_off + 4)[0]
start_hi = struct.unpack_from('<H', data, ext_off + 6)[0]
start_lo = struct.unpack_from('<I', data, ext_off + 8)[0]
phys = (start_hi << 32) | start_lo
if phys > 0:
extents.append((l_block, phys, ee_len & 0x7FFF))
return extents
def read_dir_entries(f, inode_data, inode_offset):
"""Read directory entries using extent tree from inode data"""
extents = parse_extent_tree(inode_data, inode_offset)
entries = {}
for _, phys_block, length in extents[:1]: # first extent is enough for . and ..
try:
data = read_at(f, phys_block * BLOCK, BLOCK)
offset = 0
while offset < BLOCK - 8:
ino, rec_len, name_len, ftype = struct.unpack_from(
'<IHBB', data, offset)
if rec_len < 8 or offset + rec_len > BLOCK:
break
if ino != 0 and name_len > 0:
name = data[offset+8:offset+8+name_len].decode(
'utf-8', errors='replace')
entries[name] = (ino, ftype)
offset += rec_len
except OSError:
pass
return entries
def parse_inode(data, offset):
if len(data) - offset < 128:
return None
mode, uid, size_lo = struct.unpack_from('<HHI', data, offset)
atime, ctime, mtime, dtime = struct.unpack_from('<IIII', data, offset + 8)
links_count = struct.unpack_from('<H', data, offset + 26)[0]
# block pointers start at offset 40, 60 bytes (12 direct + ind + dind + tind)
block0 = struct.unpack_from('<I', data, offset + 40)[0]
return {
'mode': mode,
'type': mode & 0xF000,
'links': links_count,
'ctime': ctime,
'mtime': mtime,
'block0': block0, # first direct block pointer
}
def main():
with open(DEV, 'rb') as f:
# Read backup superblock (no +1024 offset for backup blocks)
sb_data = read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
sb = parse_superblock(sb_data)
# After parsing superblock, check feature flags
INCOMPAT_EXTENTS = 0x40
uses_extents = sb['feature_incompat'] & INCOMPAT_EXTENTS
print(f"Extent trees: {'yes' if uses_extents else 'no'}")
assert sb['magic'] == 0xef53, f"Bad SB magic: {sb['magic']:#x}"
print(f"Geometry: {sb['blocks_per_group']} blk/grp, "
f"{sb['inodes_per_group']} ino/grp, "
f"inode_size={sb['inode_size']}, "
f"desc_size={sb['desc_size']}")
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
// sb['blocks_per_group']
print(f"Total groups: {num_groups}, scanning from group 13+")
# Read GDT from backup location (block after backup SB)
gdt_data = read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
num_groups * sb['desc_size'])
# Map: inode_num -> (parent_inode, group, name)
# We collect (dot_inode, dotdot_inode) for every dir we find
dir_parents = {} # inode -> parent_inode
all_dirs = set()
for grp in range(13, num_groups):
inode_table_block = parse_gdt_entry(
gdt_data, grp * sb['desc_size'], sb['desc_size'])
if inode_table_block == 0:
continue
inodes_per_block = BLOCK // sb['inode_size']
num_inode_blocks = (sb['inodes_per_group'] * sb['inode_size']
+ BLOCK - 1) // BLOCK
for blk_off in range(num_inode_blocks):
try:
idata = read_at(f,
(inode_table_block + blk_off) * BLOCK, BLOCK)
except OSError:
continue
for slot in range(inodes_per_block):
ino_off = slot * sb['inode_size']
ino = parse_inode(idata, ino_off)
if ino is None:
continue
if ino['type'] != 0x4000: # S_IFDIR
continue
if ino['links'] == 0:
continue
abs_inum = (grp * sb['inodes_per_group']
+ blk_off * inodes_per_block
+ slot + 1)
all_dirs.add(abs_inum)
# Add this debug block right after all_dirs.add(abs_inum)
# Just for the first 5 dirs found, dump raw extent header
if len(all_dirs) <= 5:
base = ino_off + 40
raw = idata[base:base+24]
magic, entries_cnt, max_e, depth = struct.unpack_from('<HHHH', raw, 0)
print(f"\nDEBUG inode {abs_inum} grp={grp}:")
print(f" raw bytes: {raw.hex()}")
print(f" extent header: magic={magic:#06x} entries={entries_cnt} depth={depth}")
if len(raw) >= 24:
l_block, len_blks, start_hi, start_lo = struct.unpack_from('<IIHH', raw, 12)
phys = (start_hi << 32) | start_lo
print(f" first extent: l_block={l_block} phys={phys} len={len_blks}")
# Try reading what's at that block
if phys > 0:
try:
ddata = read_at(f, phys * BLOCK, 32)
print(f" block {phys} first 32 bytes: {ddata.hex()}")
# Check if it looks like a dir entry
ino2, rec2, nlen2, ft2 = struct.unpack_from('<IHBB', ddata, 0)
print(f" as dir entry: inode={ino2} rec_len={rec2} name_len={nlen2}")
except OSError as e:
print(f" read error: {e}")
entries = read_dir_entries(f, idata, ino_off)
dot = entries.get('.', (None,))[0]
dotdot = entries.get('..', (None,))[0]
if dot == abs_inum and dotdot is not None:
dir_parents[abs_inum] = dotdot
if grp % 100 == 0:
print(f" scanned group {grp}/{num_groups}, "
f"dirs so far: {len(all_dirs)}",
end='\r', flush=True)
print(f"\nTotal dirs found: {len(all_dirs)}")
print(f"Dirs with readable . and ..: {len(dir_parents)}")
FIRST_GOOD_INODE = 13 * 8192 # first inode in group 13
orphan_roots = []
for inum, parent in dir_parents.items():
if parent == inum:
orphan_roots.append((inum, parent, 'self-referential'))
elif parent < FIRST_GOOD_INODE:
# parent is in zeroed region - this is a detached root
orphan_roots.append((inum, parent, 'parent-in-zeroed-region'))
elif parent not in all_dirs:
orphan_roots.append((inum, parent, 'parent-missing'))
# Build set of all orphaned inodes
orphan_inums = {inum for inum, parent, reason in orphan_roots}
# True roots: orphans whose parent is not itself an orphan
true_roots = [(inum, parent, reason)
for inum, parent, reason in orphan_roots
if parent not in orphan_inums]
print(f"\nOrphaned roots: {len(true_roots)}")
print(f"{'inode':>12} {'parent':>12} {'status':>12} {'dtime':>12} reason")
print('-' * 75)
with open(DEV, 'rb') as f:
for inum, parent, reason in sorted(true_roots):
try:
idata, slot = read_inode(f, sb, gdt_data, inum)
status = classify_inode(idata, slot)
dtime = struct.unpack_from('<I', idata, slot + 20)[0]
# Format dtime as human readable if set
if dtime:
import datetime
dt = datetime.datetime.fromtimestamp(dtime).strftime('%Y-%m-%d %H:%M:%S')
else:
dt = 'never'
except Exception:
status, dt = 'unreadable', 'unknown'
print(f"{inum:>12} {parent:>12} {status:>12} {dt:>19} {reason}")
if __name__ == '__main__':
main()

246
test/scan_inodes.py.bak Normal file
View File

@@ -0,0 +1,246 @@
#!/usr/bin/env python3
"""
Scan ext4 filesystem for orphaned directory roots.
Reads inode table directly using geometry from backup superblock.
"""
import struct, sys
from collections import defaultdict
DEV = '/dev/dm-0'
BLOCK = 4096
BACKUP_SB_BLOCK = 32768
def read_at(f, offset, size):
f.seek(offset)
return f.read(size)
def parse_superblock(data):
sb = {}
sb['inodes_count'] = struct.unpack_from('<I', data, 0)[0]
sb['blocks_count'] = struct.unpack_from('<I', data, 4)[0]
sb['blocks_per_group'] = struct.unpack_from('<I', data, 32)[0]
sb['inodes_per_group'] = struct.unpack_from('<I', data, 40)[0]
sb['inode_size'] = struct.unpack_from('<H', data, 88)[0]
sb['magic'] = struct.unpack_from('<H', data, 56)[0]
sb['feature_incompat'] = struct.unpack_from('<I', data, 96)[0]
sb['desc_size'] = struct.unpack_from('<H', data, 254)[0] or 32
return sb
def parse_gdt_entry(data, offset, desc_size):
"""Parse group descriptor - handles both 32 and 64-bit descriptors"""
inode_table_lo = struct.unpack_from('<I', data, offset + 8)[0]
if desc_size >= 64:
inode_table_hi = struct.unpack_from('<I', data, offset + 40)[0]
return inode_table_lo | (inode_table_hi << 32)
return inode_table_lo
def parse_extent_tree(data, inode_offset):
base = inode_offset + 40
magic, entries, max_entries, depth = struct.unpack_from('<HHHH', data, base)
if magic != 0xF30A:
return []
extents = []
if depth == 0:
for i in range(min(entries, 4)):
ext_off = base + 12 + i * 12
if ext_off + 12 > len(data):
break
# Correct layout: l_block(4) + ee_len(2) + ee_start_hi(2) + ee_start_lo(4)
l_block = struct.unpack_from('<I', data, ext_off)[0]
ee_len = struct.unpack_from('<H', data, ext_off + 4)[0]
start_hi = struct.unpack_from('<H', data, ext_off + 6)[0]
start_lo = struct.unpack_from('<I', data, ext_off + 8)[0]
phys = (start_hi << 32) | start_lo
if phys > 0:
extents.append((l_block, phys, ee_len & 0x7FFF))
return extents
def read_dir_entries(f, inode_data, inode_offset):
"""Read directory entries using extent tree from inode data"""
extents = parse_extent_tree(inode_data, inode_offset)
entries = {}
for _, phys_block, length in extents[:1]: # first extent is enough for . and ..
try:
data = read_at(f, phys_block * BLOCK, BLOCK)
offset = 0
while offset < BLOCK - 8:
ino, rec_len, name_len, ftype = struct.unpack_from(
'<IHBB', data, offset)
if rec_len < 8 or offset + rec_len > BLOCK:
break
if ino != 0 and name_len > 0:
name = data[offset+8:offset+8+name_len].decode(
'utf-8', errors='replace')
entries[name] = (ino, ftype)
offset += rec_len
except OSError:
pass
return entries
def parse_inode(data, offset):
if len(data) - offset < 128:
return None
mode, uid, size_lo = struct.unpack_from('<HHI', data, offset)
atime, ctime, mtime, dtime = struct.unpack_from('<IIII', data, offset + 8)
links_count = struct.unpack_from('<H', data, offset + 26)[0]
# block pointers start at offset 40, 60 bytes (12 direct + ind + dind + tind)
block0 = struct.unpack_from('<I', data, offset + 40)[0]
return {
'mode': mode,
'type': mode & 0xF000,
'links': links_count,
'ctime': ctime,
'mtime': mtime,
'block0': block0, # first direct block pointer
}
def main():
with open(DEV, 'rb') as f:
# Read backup superblock (no +1024 offset for backup blocks)
sb_data = read_at(f, BACKUP_SB_BLOCK * BLOCK, 1024)
sb = parse_superblock(sb_data)
# After parsing superblock, check feature flags
INCOMPAT_EXTENTS = 0x40
uses_extents = sb['feature_incompat'] & INCOMPAT_EXTENTS
print(f"Extent trees: {'yes' if uses_extents else 'no'}")
assert sb['magic'] == 0xef53, f"Bad SB magic: {sb['magic']:#x}"
print(f"Geometry: {sb['blocks_per_group']} blk/grp, "
f"{sb['inodes_per_group']} ino/grp, "
f"inode_size={sb['inode_size']}, "
f"desc_size={sb['desc_size']}")
num_groups = (sb['blocks_count'] + sb['blocks_per_group'] - 1) \
// sb['blocks_per_group']
print(f"Total groups: {num_groups}, scanning from group 13+")
# Read GDT from backup location (block after backup SB)
gdt_data = read_at(f, (BACKUP_SB_BLOCK + 1) * BLOCK,
num_groups * sb['desc_size'])
# Map: inode_num -> (parent_inode, group, name)
# We collect (dot_inode, dotdot_inode) for every dir we find
dir_parents = {} # inode -> parent_inode
all_dirs = set()
for grp in range(13, num_groups):
inode_table_block = parse_gdt_entry(
gdt_data, grp * sb['desc_size'], sb['desc_size'])
if inode_table_block == 0:
continue
inodes_per_block = BLOCK // sb['inode_size']
num_inode_blocks = (sb['inodes_per_group'] * sb['inode_size']
+ BLOCK - 1) // BLOCK
for blk_off in range(num_inode_blocks):
try:
idata = read_at(f,
(inode_table_block + blk_off) * BLOCK, BLOCK)
except OSError:
continue
for slot in range(inodes_per_block):
ino_off = slot * sb['inode_size']
ino = parse_inode(idata, ino_off)
if ino is None:
continue
if ino['type'] != 0x4000: # S_IFDIR
continue
if ino['links'] == 0:
continue
abs_inum = (grp * sb['inodes_per_group']
+ blk_off * inodes_per_block
+ slot + 1)
all_dirs.add(abs_inum)
# Add this debug block right after all_dirs.add(abs_inum)
# Just for the first 5 dirs found, dump raw extent header
if len(all_dirs) <= 5:
base = ino_off + 40
raw = idata[base:base+24]
magic, entries_cnt, max_e, depth = struct.unpack_from('<HHHH', raw, 0)
print(f"\nDEBUG inode {abs_inum} grp={grp}:")
print(f" raw bytes: {raw.hex()}")
print(f" extent header: magic={magic:#06x} entries={entries_cnt} depth={depth}")
if len(raw) >= 24:
l_block, len_blks, start_hi, start_lo = struct.unpack_from('<IIHH', raw, 12)
phys = (start_hi << 32) | start_lo
print(f" first extent: l_block={l_block} phys={phys} len={len_blks}")
# Try reading what's at that block
if phys > 0:
try:
ddata = read_at(f, phys * BLOCK, 32)
print(f" block {phys} first 32 bytes: {ddata.hex()}")
# Check if it looks like a dir entry
ino2, rec2, nlen2, ft2 = struct.unpack_from('<IHBB', ddata, 0)
print(f" as dir entry: inode={ino2} rec_len={rec2} name_len={nlen2}")
except OSError as e:
print(f" read error: {e}")
entries = read_dir_entries(f, idata, ino_off)
dot = entries.get('.', (None,))[0]
dotdot = entries.get('..', (None,))[0]
if dot == abs_inum and dotdot is not None:
dir_parents[abs_inum] = dotdot
if grp % 100 == 0:
print(f" scanned group {grp}/{num_groups}, "
f"dirs so far: {len(all_dirs)}",
end='\r', flush=True)
print(f"\nTotal dirs found: {len(all_dirs)}")
print(f"Dirs with readable . and ..: {len(dir_parents)}")
FIRST_GOOD_INODE = 13 * 8192 # first inode in group 13
orphan_roots = []
for inum, parent in dir_parents.items():
if parent == inum:
orphan_roots.append((inum, parent, 'self-referential'))
elif parent < FIRST_GOOD_INODE:
# parent is in zeroed region - this is a detached root
orphan_roots.append((inum, parent, 'parent-in-zeroed-region'))
elif parent not in all_dirs:
orphan_roots.append((inum, parent, 'parent-missing'))
print(f"\nOrphaned roots: {len(orphan_roots)}")
print(f"{'inode':>12} {'parent':>12} reason")
print('-' * 45)
for inum, parent, reason in sorted(orphan_roots):
print(f"{inum:>12} {parent:>12} {reason}")
# Add this after the orphan_roots list is built
# Build set of all orphaned inodes
orphan_inums = {inum for inum, parent, reason in orphan_roots}
# True roots: orphans whose parent is not itself an orphan
true_roots = [(inum, parent, reason)
for inum, parent, reason in orphan_roots
if parent not in orphan_inums]
print(f"\nTrue detached tree roots: {len(true_roots)}")
print(f"{'inode':>12} {'parent':>12} reason")
print('-' * 55)
for inum, parent, reason in sorted(true_roots):
# Try to get first few dir entries to identify the tree
with open(DEV, 'rb') as f:
grp = (inum - 1) // sb['inodes_per_group']
local_idx = (inum - 1) % sb['inodes_per_group']
inode_table_block = parse_gdt_entry(
gdt_data, grp * sb['desc_size'], sb['desc_size'])
blk_off = (local_idx * sb['inode_size']) // BLOCK
slot = (local_idx * sb['inode_size']) % BLOCK
idata = read_at(f, (inode_table_block + blk_off) * BLOCK, BLOCK)
entries = read_dir_entries(f, idata, slot)
# Show entries excluding . and ..
names = [k for k in entries if k not in ('.', '..')][:5]
print(f"{inum:>12} {parent:>12} {names}")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,127 @@
#!/usr/bin/env python3
import struct, os
DEVICE = '/dev/nbd0'
BSIZE = 4096
IPG = 8192
INODE_SZ = 256
NUM_GROUPS = 35728
MIN_GROUP = 13
TARGETS = [
b'pterodactyl',
b'mysql',
b'www',
b'log',
b'docker',
b'nginx',
b'apache2',
b'archives',
b'wings',
b'grub2-efi.cfg',
b'commons-codec',
]
def is_valid_dirent(block, off, name):
if off + 8 + len(name) > BSIZE: return False
inode = struct.unpack_from('<I', block, off)[0]
rec_len = struct.unpack_from('<H', block, off+4)[0]
name_len = block[off+6]
ftype = block[off+7]
if not (10 < inode < 500_000_000): return False
if name_len != len(name): return False
if rec_len < 8+name_len or rec_len > BSIZE or rec_len%4 != 0: return False
if ftype not in (1,2,7): return False
if block[off+8:off+8+name_len] != name: return False
pad = off+8+name_len
if pad < BSIZE and block[pad] != 0: return False
return True
def parse_extents(inode_data):
blocks = []
magic = struct.unpack_from('<H', inode_data, 40)[0]
if magic != 0xf30a:
return blocks
depth = struct.unpack_from('<H', inode_data, 46)[0]
entries = struct.unpack_from('<H', inode_data, 42)[0]
if depth == 0:
for i in range(min(entries, 4)):
off = 52 + i*12
ee_len = struct.unpack_from('<H', inode_data, off+4)[0]
ee_hi = struct.unpack_from('<H', inode_data, off+6)[0]
ee_lo = struct.unpack_from('<I', inode_data, off+8)[0]
ee_start = (ee_hi << 32) | ee_lo
if ee_len > 1024: continue
for b in range(min(ee_len, 8)):
blocks.append(ee_start + b)
return blocks
results = {}
print(f'Device: {DEVICE}')
print(f'Scanning groups {MIN_GROUP} to {NUM_GROUPS-1}...')
print()
with open(DEVICE, 'rb', buffering=0) as f:
for group in range(MIN_GROUP, NUM_GROUPS):
it_block = 1070 + group * 512
try:
f.seek(it_block * BSIZE)
inode_table = f.read(IPG * INODE_SZ)
except OSError:
continue
for idx in range(IPG):
inode_data = inode_table[idx*INODE_SZ:(idx+1)*INODE_SZ]
if not any(inode_data):
continue
mode = struct.unpack_from('<H', inode_data, 0)[0]
links = struct.unpack_from('<H', inode_data, 26)[0]
if (mode & 0xf000) != 0x4000:
continue
if links < 2:
continue
inode_num = group * IPG + idx + 1
blocks = parse_extents(inode_data)
for blk in blocks:
try:
f.seek(blk * BSIZE)
blk_data = f.read(BSIZE)
except OSError:
continue
for target in TARGETS:
if target not in blk_data:
continue
for off in range(0, BSIZE-8):
if blk_data[off+8:off+8+len(target)] != target:
continue
if is_valid_dirent(blk_data, off, target):
child_ino = struct.unpack_from('<I',blk_data,off)[0]
ftype = blk_data[off+7]
child_grp = (child_ino-1)//IPG
key = (target.decode(), child_ino)
if key not in results:
results[key] = (inode_num, ftype, child_grp)
status = 'INTACT' if child_grp>=13 else 'LOST'
tname = {1:'file',2:'dir',7:'link'}.get(ftype,'?')
print(f'[{status}] {target.decode()!r:15s} '
f'child={child_ino:10d} '
f'parent={inode_num:10d} '
f'type={tname}', flush=True)
if group % 1000 == 0:
print(f' Group {group}/{NUM_GROUPS}...', flush=True)
print()
print('=== SUMMARY ===')
for (name, child_ino), (parent_ino, ftype, grp) in sorted(results.items()):
status = 'INTACT' if grp >= 13 else 'LOST'
tname = {1:'file',2:'dir',7:'link'}.get(ftype,'?')
print(f'[{status}] {name!r:15s} child={child_ino} '
f'parent={parent_ino} type={tname}')

View File

@@ -0,0 +1,150 @@
#!/usr/bin/env python3
"""
Scan inode tables directly for directory inodes,
then read their data blocks looking for target names.
Much faster than full disk scan.
"""
import struct, os
CHUNK = 128 * 512
LV_START = 5120000 * 512
BSIZE = 4096
DISKS = ['/dev/sda', '/dev/sde', '/dev/sdd', '/dev/sdc']
IPG = 8192
INODE_SZ = 256
BPG = 32768
NUM_GROUPS = 35728
MIN_GROUP = 13 # groups 0-12 are zeroed
TARGETS = [
b'pterodactyl',
b'var',
b'mysql',
b'www',
b'log',
b'docker',
b'nginx',
b'apache2',
b'www-data',
]
def v_to_p(virt_byte):
"""Virtual byte offset to physical (disk 0) byte offset."""
group = virt_byte // (5 * CHUNK)
in_group = virt_byte % (5 * CHUNK)
chunk_idx = in_group // CHUNK
intra = in_group % CHUNK
if chunk_idx == 4:
return None
return LV_START + group*4*CHUNK + chunk_idx*CHUNK + intra
def read_virt(f, virt_byte, length):
"""Read from virtual address space via disk 0."""
phys = v_to_p(virt_byte)
if phys is None:
return b'\x00' * length
f.seek(phys)
return f.read(length)
def is_valid_dirent(block, off, name):
if off + 8 + len(name) > BSIZE: return False
inode = struct.unpack_from('<I', block, off)[0]
rec_len = struct.unpack_from('<H', block, off+4)[0]
name_len = block[off+6]
ftype = block[off+7]
if not (10 < inode < 500_000_000): return False
if name_len != len(name): return False
if rec_len < 8+name_len or rec_len > BSIZE or rec_len%4 != 0: return False
if ftype not in (1,2,7): return False
if block[off+8:off+8+name_len] != name: return False
pad = off+8+name_len
if pad < BSIZE and block[pad] != 0: return False
return True
def parse_extents(inode_data):
"""Get list of physical block numbers from extent tree."""
blocks = []
magic = struct.unpack_from('<H', inode_data, 40)[0]
if magic != 0xf30a:
return blocks
depth = struct.unpack_from('<H', inode_data, 46)[0]
entries = struct.unpack_from('<H', inode_data, 42)[0]
if depth == 0:
for i in range(min(entries, 4)):
off = 52 + i*12
ee_len = struct.unpack_from('<H', inode_data, off+4)[0]
ee_hi = struct.unpack_from('<H', inode_data, off+6)[0]
ee_lo = struct.unpack_from('<I', inode_data, off+8)[0]
ee_start = (ee_hi << 32) | ee_lo
for b in range(min(ee_len, 8)): # max 8 blocks per dir
blocks.append(ee_start + b)
return blocks
results = {}
print('Scanning inode tables directly...')
print(f'Groups to scan: {MIN_GROUP} to {NUM_GROUPS-1}')
print()
with open('/dev/sda', 'rb', buffering=0) as f:
for group in range(MIN_GROUP, NUM_GROUPS):
# Inode table for group N is at block 1070 + N*512
it_block = 1070 + group * 512
it_virt = it_block * BSIZE
# Read entire inode table for this group
inode_table = read_virt(f, it_virt, IPG * INODE_SZ)
for idx in range(IPG):
inode_data = inode_table[idx*INODE_SZ:(idx+1)*INODE_SZ]
if not any(inode_data):
continue
mode = struct.unpack_from('<H', inode_data, 0)[0]
links = struct.unpack_from('<H', inode_data, 26)[0]
size = struct.unpack_from('<I', inode_data, 4)[0]
# Check if directory: mode & 0xf000 == 0x4000
if (mode & 0xf000) != 0x4000:
continue
if links < 2:
continue
inode_num = group * IPG + idx + 1
# Read directory data blocks and scan for targets
blocks = parse_extents(inode_data)
for blk in blocks:
blk_virt = blk * BSIZE
blk_data = read_virt(f, blk_virt, BSIZE)
for target in TARGETS:
if target not in blk_data:
continue
for off in range(0, BSIZE-8):
if blk_data[off+8:off+8+len(target)] != target:
continue
if is_valid_dirent(blk_data, off, target):
child_ino = struct.unpack_from('<I',blk_data,off)[0]
ftype = blk_data[off+7]
child_grp = (child_ino-1)//IPG
key = (target.decode(), child_ino)
if key not in results:
results[key] = (inode_num, ftype, child_grp)
status = 'INTACT' if child_grp>=13 else 'LOST'
tname = {1:'file',2:'dir',7:'link'}.get(ftype,'?')
print(f'[{status}] {target.decode()!r:15s} '
f'child_inode={child_ino:10d} '
f'parent_inode={inode_num:10d} '
f'type={tname}')
if group % 1000 == 0:
print(f' Group {group}/{NUM_GROUPS}...', flush=True)
print()
print('=== SUMMARY ===')
for (name, child_ino), (parent_ino, ftype, grp) in sorted(results.items()):
status = 'INTACT' if grp >= 13 else 'LOST'
tname = {1:'file',2:'dir',7:'link'}.get(ftype,'?')
print(f'[{status}] {name!r:15s} '
f'child={child_ino} parent={parent_ino} type={tname}')

12
test/setup.sh Normal file
View File

@@ -0,0 +1,12 @@
apt update
apt install -y nbd-server nbd-client python3-libnbd testdisk sleuthkit python3-crcmod
mkdir /mnt/recovered
mdadm --stop /dev/md126
mdadm --stop /dev/md127
mdadm --build /dev/md0 --level=0 --raid-devices=4 \
--chunk=64 /dev/sda /dev/sde /dev/sdd /dev/sdc
python3.12 build_merged.py
python3.12 nbd_server_v9.py &
nbd-client 127.0.0.1 10809 /dev/nbd0 -N ""

25
test/test.py Normal file
View File

@@ -0,0 +1,25 @@
# Run this standalone first to verify the block is readable and contains dir entries
import struct
BLOCK = 4096
DEV = '/dev/dm-0'
phys = 3153952 # from first debug inode
with open(DEV, 'rb') as f:
f.seek(phys * BLOCK)
data = f.read(BLOCK)
print(f"Read {len(data)} bytes")
print(f"First 32 bytes: {data[:32].hex()}")
# Try parsing as dir entries
offset = 0
while offset < 128:
ino, rec_len, name_len, ftype = struct.unpack_from('<IHBB', data, offset)
print(f" offset={offset}: inode={ino} rec_len={rec_len} name_len={name_len} ftype={ftype}")
if rec_len < 8:
break
if name_len > 0:
name = data[offset+8:offset+8+name_len].decode('utf-8', errors='replace')
print(f" name='{name}'")
offset += rec_len

52
test/testoffset.sh Normal file
View File

@@ -0,0 +1,52 @@
python3 -c "
import struct
# Read what's actually at the start of our NBD device
with open('/dev/nbd0','rb') as f:
# Block 0 (should be boot block, all zeros for non-bootable)
block0 = f.read(4096)
# Superblock at byte 1024
f.seek(1024)
sb = f.read(256)
# Check block 0
nonzero = sum(1 for b in block0 if b != 0)
print(f'Block 0 non-zero bytes: {nonzero} (should be 0 for ext4)')
print(f'Block 0 first 16: {block0[:16].hex()}')
# Check superblock
magic = struct.unpack_from('<H', sb, 56)[0]
uuid = sb[104:120].hex()
first_data_block = struct.unpack_from('<I', sb, 20)[0]
blocks_per_group = struct.unpack_from('<I', sb, 40)[0]
print(f'SB magic: 0x{magic:04x} (want 0xef53)')
print(f'SB uuid: {uuid}')
print(f'first_data_block: {first_data_block} (0 for bsize>1024, 1 for bsize=1024)')
print(f'blocks_per_group: {blocks_per_group}')
# The GDT should be at block 1 = byte 4096
# But if first_data_block=1, GDT is at block 2 = byte 8192
f2 = open('/dev/nbd0','rb')
f2.seek(4096)
gdt0 = f2.read(64)
bb = struct.unpack_from('<I',gdt0,0)[0]
ib = struct.unpack_from('<I',gdt0,4)[0]
it = struct.unpack_from('<I',gdt0,8)[0]
cs = struct.unpack_from('<H',gdt0,30)[0]
print(f'At byte 4096 (block 1): bb={bb} ib={ib} it={it} csum=0x{cs:04x}')
f2.seek(8192)
gdt0b = f2.read(64)
bb = struct.unpack_from('<I',gdt0b,0)[0]
ib = struct.unpack_from('<I',gdt0b,4)[0]
it = struct.unpack_from('<I',gdt0b,8)[0]
cs = struct.unpack_from('<H',gdt0b,30)[0]
print(f'At byte 8192 (block 2): bb={bb} ib={ib} it={it} csum=0x{cs:04x}')
# Check what libext2fs would see as the device size
import os
f2.seek(0,2)
size = f2.tell()
print(f'NBD device size: {size} bytes = {size//4096} blocks')
f2.close()
"

25
test/testsize.sh Normal file
View File

@@ -0,0 +1,25 @@
# What size does the kernel think nbd0 is?
blockdev --getsize64 /dev/nbd0
blockdev --getsz /dev/nbd0
# What does the superblock say?
python3 -c "
import struct
with open('/dev/nbd0','rb') as f:
f.seek(1024)
sb = f.read(256)
blocks_lo = struct.unpack_from('<I',sb,4)[0]
blocks_hi = struct.unpack_from('<I',sb,336)[0]
total = (blocks_hi<<32)|blocks_lo
bsize = 4096
print(f'SB block count: {total}')
print(f'SB filesystem size: {total*bsize} bytes')
print(f'NBD device size: ', end='')
import subprocess
r = subprocess.run(['blockdev','--getsize64','/dev/nbd0'],
capture_output=True,text=True)
nbd_size = int(r.stdout.strip())
print(nbd_size)
print(f'Match: {total*bsize == nbd_size}')
print(f'Difference: {abs(total*bsize - nbd_size)} bytes')
"

47713
test/tree.txt Normal file

File diff suppressed because it is too large Load Diff

49802
test/true_roots.txt Normal file

File diff suppressed because it is too large Load Diff

60
test/tt.sh Normal file
View File

@@ -0,0 +1,60 @@
python3 -c "
import struct
CHUNK = 128*512
LV_START = 5120000*512
BSIZE = 4096
GDT_ENTRY = 64
BPG = 32768
def raw_read(virt_offset, length):
result = bytearray(length)
pos = virt_offset
remaining = length
with open('/dev/md0','rb') as f:
while remaining > 0:
group = pos // (5*CHUNK)
in_group = pos % (5*CHUNK)
chunk_idx = in_group // CHUNK
intra = in_group % CHUNK
seg_len = min(CHUNK-intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = LV_START + group*4*CHUNK + chunk_idx*CHUNK + intra
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off+len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
# Read primary GDT (at block 1 = byte 4096)
# and backup GDT (at block group 1 start + 1 block)
# Group 1 starts at block 32768, GDT backup at block 32769 = byte 32769*4096
# Read 1000 entries from primary GDT
primary_gdt = bytearray(raw_read(BSIZE, 1000 * GDT_ENTRY))
# Read 1000 entries from backup GDT at group 1
backup_start = (BPG + 1) * BSIZE # block 32769
backup_gdt = raw_read(backup_start, 1000 * GDT_ENTRY)
print('Comparing primary vs backup GDT entries:')
mismatches = 0
zeros = 0
for i in range(1000):
p = primary_gdt[i*GDT_ENTRY:(i+1)*GDT_ENTRY]
b = backup_gdt[i*GDT_ENTRY:(i+1)*GDT_ENTRY]
p_bb = struct.unpack_from('<I',p,0)[0]
b_bb = struct.unpack_from('<I',b,0)[0]
if p == bytes(GDT_ENTRY):
zeros += 1
elif p != b:
mismatches += 1
if mismatches <= 5:
print(f' Group {i}: primary bb={p_bb} vs backup bb={b_bb}')
print(f'Zero entries in primary: {zeros}/1000')
print(f'Mismatches: {mismatches}/1000')
print(f'Backup GDT group 0: bb={struct.unpack_from(\"<I\",backup_gdt,0)[0]}')
"

51
test/uu.sh Normal file
View File

@@ -0,0 +1,51 @@
python3 -c "
import struct
CHUNK = 128*512
LV_START = 5120000*512
BSIZE = 4096
GDT_ENTRY = 64
BPG = 32768
NUM_GROUPS = 35728
def raw_read(virt_offset, length):
result = bytearray(length)
pos = virt_offset
remaining = length
with open('/dev/md0','rb') as f:
while remaining > 0:
group = pos // (5*CHUNK)
in_group = pos % (5*CHUNK)
chunk_idx = in_group // CHUNK
intra = in_group % CHUNK
seg_len = min(CHUNK-intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = LV_START + group*4*CHUNK + chunk_idx*CHUNK + intra
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off+len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
# Read full backup GDT from group 1 (block 32769)
backup_start_virt = (BPG + 1) * BSIZE
gdt_size = NUM_GROUPS * GDT_ENTRY
print(f'Reading backup GDT: {gdt_size//1024}KB from virtual byte {backup_start_virt}')
backup_gdt = raw_read(backup_start_virt, gdt_size)
# Verify it looks sane
for i in [0,1,2,100,1000,10000]:
e = backup_gdt[i*GDT_ENTRY:(i+1)*GDT_ENTRY]
bb = struct.unpack_from('<I',e,0)[0]
ib = struct.unpack_from('<I',e,4)[0]
it = struct.unpack_from('<I',e,8)[0]
cs = struct.unpack_from('<H',e,30)[0]
print(f' Backup group {i}: bb={bb} ib={ib} it={it} csum=0x{cs:04x}')
# Save it
with open('/tmp/backup_gdt.bin','wb') as f:
f.write(backup_gdt)
print(f'Saved {len(backup_gdt)} bytes to /tmp/backup_gdt.bin')
"

81
test/vv.sh Normal file
View File

@@ -0,0 +1,81 @@
python3 -c "
import struct
import binascii
CHUNK = 128*512
LV_START = 5120000*512
BSIZE = 4096
GDT_ENTRY = 64
BPG = 32768
NUM_GROUPS = 35728
def raw_read(virt_offset, length):
result = bytearray(length)
pos = virt_offset
remaining = length
with open('/dev/md0','rb') as f:
while remaining > 0:
group = pos // (5*CHUNK)
in_group = pos % (5*CHUNK)
chunk_idx = in_group // CHUNK
intra = in_group % CHUNK
seg_len = min(CHUNK-intra, remaining)
dst_off = pos - virt_offset
if chunk_idx != 4:
phys = LV_START + group*4*CHUNK + chunk_idx*CHUNK + intra
f.seek(phys)
data = f.read(seg_len)
result[dst_off:dst_off+len(data)] = data
pos += seg_len
remaining -= seg_len
return bytes(result)
# Read UUID and checksum seed from superblock
sb = raw_read(1024, 1024)
uuid = sb[104:120]
csum_seed = struct.unpack_from('<I', sb, 408)[0]
print(f'UUID: {uuid.hex()}')
print(f'csum_seed: 0x{csum_seed:08x}')
# Read backup GDT
backup_gdt = bytearray(raw_read((BPG+1)*BSIZE, NUM_GROUPS * GDT_ENTRY))
# CRC32c implementation
try:
import crcmod
crc32c_fn = crcmod.predefined.mkCrcFun('crc-32c')
def crc32c(data, seed=0):
return crc32c_fn(data, seed)
except ImportError:
# Fallback: use kernel-compatible crc32c via ctypes
import ctypes
try:
lib = ctypes.CDLL('libiscsi.so.2')
def crc32c(data, seed=0):
return lib.iscsi_crc32c(data, len(data), seed)
except:
print('WARNING: no crc32c available, checksums will be wrong')
def crc32c(data, seed=0):
return 0
# Recalculate and patch checksums for all GDT entries
print(f'Patching checksums for {NUM_GROUPS} groups...')
bad = 0
for g in range(NUM_GROUPS):
entry = bytearray(backup_gdt[g*GDT_ENTRY:(g+1)*GDT_ENTRY])
# Zero out checksum field (offset 30, 2 bytes) before computing
struct.pack_into('<H', entry, 30, 0)
# CRC32c(seed XOR uuid XOR group_le16 XOR entry_without_csum)
grp_le = struct.pack('<H', g)
csum_data = uuid + grp_le + bytes(entry)
csum = crc32c(csum_data, csum_seed) & 0xFFFF
struct.pack_into('<H', entry, 30, csum)
backup_gdt[g*GDT_ENTRY:(g+1)*GDT_ENTRY] = entry
with open('/tmp/patched_gdt.bin','wb') as f:
f.write(backup_gdt)
print(f'Written /tmp/patched_gdt.bin ({len(backup_gdt)//1024}KB)')
"

71
test/zz.sh Normal file
View File

@@ -0,0 +1,71 @@
python3 -c "
CHUNK = 128*512
LV_START = 5120000*512
BSIZE = 4096
BPG = 32768
NUM_GROUPS = 35728
GDT_ENTRY = 64
# Check which GDT entries fall in metadata chunks
# for BOTH primary and backup GDT locations
print('Primary GDT (virtual byte 4096):')
prim_bad = []
for g in range(NUM_GROUPS):
byte_abs = BSIZE + g * GDT_ENTRY
in_group = byte_abs % (5*CHUNK)
chunk_idx = in_group // CHUNK
if chunk_idx == 4:
prim_bad.append(g)
print(f' Groups in metadata chunks: {len(prim_bad)}')
print(f' First few: {prim_bad[:10]}')
print(f' Last few: {prim_bad[-5:]}')
print()
print('Backup GDT (virtual byte (BPG+1)*BSIZE):')
backup_start = (BPG+1) * BSIZE
backup_bad = []
for g in range(NUM_GROUPS):
byte_abs = backup_start + g * GDT_ENTRY
in_group = byte_abs % (5*CHUNK)
chunk_idx = in_group // CHUNK
if chunk_idx == 4:
backup_bad.append(g)
print(f' Groups in metadata chunks: {len(backup_bad)}')
print(f' First few: {backup_bad[:10]}')
print(f' Last few: {backup_bad[-5:]}')
# Check if bad groups overlap
overlap = set(prim_bad) & set(backup_bad)
print(f' Overlap with primary bad: {len(overlap)}')
# Find a backup GDT location with NO metadata chunk conflicts
print()
print('Searching for backup GDT with no metadata chunk conflicts...')
# Backup GDTs exist at groups: 1,3,5,7,9,25,27,49,81,125,243,343...
def has_backup(g):
if g <= 1: return True
for base in [3,5,7]:
n = base
while n < g: n *= base
if n == g: return True
return False
backup_groups = [g for g in range(1000) if has_backup(g) and g > 0]
for bg in backup_groups[:20]:
start = (bg * BPG + 1) * BSIZE
bad = []
for g in range(NUM_GROUPS):
byte_abs = start + g * GDT_ENTRY
in_group = byte_abs % (5*CHUNK)
chunk_idx = in_group // CHUNK
if chunk_idx == 4:
bad.append(g)
print(f' Backup at group {bg}: {len(bad)} bad entries '
f'({\"CLEAN\" if len(bad)==0 else bad[:3]})')
if len(bad) == 0:
print(f' *** PERFECT BACKUP GDT at group {bg} ***')
print(f' Virtual byte: {start}')
break
"