228 lines
8.1 KiB
Bash
228 lines
8.1 KiB
Bash
cat > /tmp/reconstruct_tree.py << 'EOF'
|
|
#!/usr/bin/env python3
|
|
"""
|
|
Reconstruct full directory tree from inode tables.
|
|
Attaches orphaned subtrees to lost+found.
|
|
Extracts everything using icat/debugfs.
|
|
"""
|
|
import struct, os, subprocess, collections
|
|
|
|
DEVICE = '/dev/nbd0'
|
|
OUTDIR = '/mnt/recovered/reconstructed'
|
|
BSIZE = 4096
|
|
IPG = 8192
|
|
INODE_SZ = 256
|
|
NUM_GROUPS = 35728
|
|
MIN_GROUP = 13
|
|
|
|
# inode -> parent_inode (from .. entry)
|
|
parent_of = {}
|
|
# inode -> [(child_inode, name, ftype)]
|
|
children = collections.defaultdict(list)
|
|
# inode -> name (as seen from parent's directory block)
|
|
inode_name = {}
|
|
# all directory inodes found
|
|
dir_inodes = set()
|
|
|
|
def parse_extents(inode_data):
|
|
blocks = []
|
|
magic = struct.unpack_from('<H', inode_data, 40)[0]
|
|
if magic != 0xf30a: return blocks
|
|
entries = struct.unpack_from('<H', inode_data, 42)[0]
|
|
depth = struct.unpack_from('<H', inode_data, 46)[0]
|
|
if depth == 0:
|
|
for i in range(min(entries, 4)):
|
|
off = 52 + i*12
|
|
ee_len = struct.unpack_from('<H', inode_data, off+4)[0]
|
|
ee_hi = struct.unpack_from('<H', inode_data, off+6)[0]
|
|
ee_lo = struct.unpack_from('<I', inode_data, off+8)[0]
|
|
ee_start = (ee_hi<<32)|ee_lo
|
|
if ee_len > 1024: continue
|
|
for b in range(min(ee_len, 8)):
|
|
blocks.append(ee_start + b)
|
|
return blocks
|
|
|
|
def read_dirents(f, inode_data):
|
|
entries = []
|
|
for blk in parse_extents(inode_data):
|
|
try:
|
|
f.seek(blk * BSIZE)
|
|
data = f.read(BSIZE)
|
|
except OSError:
|
|
continue
|
|
off = 0
|
|
while off < BSIZE - 8:
|
|
ino = struct.unpack_from('<I', data, off)[0]
|
|
rec_len = struct.unpack_from('<H', data, off+4)[0]
|
|
name_len = data[off+6]
|
|
ftype = data[off+7]
|
|
if rec_len < 8: break
|
|
if ino > 0 and name_len > 0:
|
|
name = data[off+8:off+8+name_len].decode('utf-8',errors='replace')
|
|
entries.append((ino, name, ftype))
|
|
off += rec_len
|
|
return entries
|
|
|
|
# ── Phase 1: scan all inode tables ───────────────────────────────────────────
|
|
print('Phase 1: Scanning inode tables...')
|
|
with open(DEVICE, 'rb', buffering=0) as f:
|
|
for group in range(MIN_GROUP, NUM_GROUPS):
|
|
it_block = 1070 + group * 512
|
|
try:
|
|
f.seek(it_block * BSIZE)
|
|
inode_table = f.read(IPG * INODE_SZ)
|
|
except OSError:
|
|
continue
|
|
|
|
for idx in range(IPG):
|
|
inode_data = inode_table[idx*INODE_SZ:(idx+1)*INODE_SZ]
|
|
if not any(inode_data): continue
|
|
|
|
mode = struct.unpack_from('<H', inode_data, 0)[0]
|
|
links = struct.unpack_from('<H', inode_data, 26)[0]
|
|
|
|
if (mode & 0xf000) != 0x4000: continue
|
|
if links < 2: continue
|
|
|
|
inode_num = group * IPG + idx + 1
|
|
dir_inodes.add(inode_num)
|
|
|
|
# Read directory entries to find parent and children
|
|
entries = read_dirents(f, inode_data)
|
|
for ino, name, ftype in entries:
|
|
if name == '..':
|
|
parent_of[inode_num] = ino
|
|
elif name != '.':
|
|
children[inode_num].append((ino, name, ftype))
|
|
inode_name[ino] = name
|
|
|
|
if group % 2000 == 0:
|
|
print(f' Group {group}/{NUM_GROUPS}: '
|
|
f'{len(dir_inodes)} dirs found...', flush=True)
|
|
|
|
print(f'Found {len(dir_inodes)} directory inodes')
|
|
print(f'Found {len(parent_of)} directories with known parents')
|
|
|
|
# ── Phase 2: find orphan roots ────────────────────────────────────────────────
|
|
print('\nPhase 2: Finding orphan roots...')
|
|
|
|
def resolve_path(inode, depth=0, visited=None):
|
|
if visited is None: visited = set()
|
|
if inode in visited: return None
|
|
visited.add(inode)
|
|
if depth > 50: return None
|
|
|
|
parent = parent_of.get(inode)
|
|
if parent is None:
|
|
return f'lost+found/unknown_{inode}'
|
|
|
|
grp = (parent-1) // IPG
|
|
if grp < MIN_GROUP:
|
|
# Parent is in zeroed region - this is an orphan root
|
|
name = inode_name.get(inode, f'inode_{inode}')
|
|
return f'lost+found/{name}_{inode}'
|
|
|
|
if parent not in dir_inodes:
|
|
# Parent not found in our scan
|
|
name = inode_name.get(inode, f'inode_{inode}')
|
|
return f'lost+found/{name}_{inode}'
|
|
|
|
parent_path = resolve_path(parent, depth+1, visited)
|
|
if parent_path is None:
|
|
return f'lost+found/inode_{inode}'
|
|
|
|
name = inode_name.get(inode, f'inode_{inode}')
|
|
return os.path.join(parent_path, name)
|
|
|
|
# Resolve paths for all directories
|
|
print('Resolving paths...')
|
|
resolved = {}
|
|
for ino in dir_inodes:
|
|
resolved[ino] = resolve_path(ino)
|
|
|
|
# Summary
|
|
in_lf = sum(1 for p in resolved.values() if p and p.startswith('lost+found/')
|
|
and p.count('/') == 1)
|
|
deep = sum(1 for p in resolved.values() if p and not p.startswith('lost+found'))
|
|
print(f'Orphan roots in lost+found: {in_lf}')
|
|
print(f'Dirs with resolved paths: {deep}')
|
|
|
|
# Show interesting paths
|
|
print('\nInteresting resolved paths:')
|
|
for ino, path in sorted(resolved.items(), key=lambda x: x[1] or ''):
|
|
if path and any(x in path for x in ['pterodactyl','docker','mysql',
|
|
'www','nginx','var','log']):
|
|
print(f' inode {ino:10d}: {path}')
|
|
|
|
# Save tree
|
|
with open('/tmp/resolved_tree.txt','w') as f:
|
|
for ino, path in sorted(resolved.items(), key=lambda x: x[1] or ''):
|
|
f.write(f'{ino}\t{path or "unknown"}\n')
|
|
print(f'\nSaved {len(resolved)} paths to /tmp/resolved_tree.txt')
|
|
|
|
# ── Phase 3: extract ──────────────────────────────────────────────────────────
|
|
print('\nPhase 3: Extracting...')
|
|
os.makedirs(OUTDIR, exist_ok=True)
|
|
|
|
stats = {'dirs':0, 'files_ok':0, 'files_err':0, 'bytes':0}
|
|
|
|
# Create all directories first
|
|
for ino, path in sorted(resolved.items(), key=lambda x: len(x[1] or '')):
|
|
if not path: continue
|
|
abs_path = os.path.join(OUTDIR, path)
|
|
os.makedirs(abs_path, exist_ok=True)
|
|
stats['dirs'] += 1
|
|
|
|
# Extract files in each directory
|
|
for dir_ino, path in resolved.items():
|
|
if not path: continue
|
|
abs_dir = os.path.join(OUTDIR, path)
|
|
|
|
for child_ino, name, ftype in children.get(dir_ino, []):
|
|
# Skip if it's a directory (already created)
|
|
if child_ino in dir_inodes: continue
|
|
|
|
outpath = os.path.join(abs_dir, name)
|
|
if ftype == 1: # regular file
|
|
try:
|
|
with open(outpath, 'wb') as out:
|
|
subprocess.run(
|
|
['icat', DEVICE, str(child_ino)],
|
|
stdout=out, stderr=subprocess.DEVNULL,
|
|
timeout=300
|
|
)
|
|
size = os.path.getsize(outpath)
|
|
stats['files_ok'] += 1
|
|
stats['bytes'] += size
|
|
if stats['files_ok'] % 500 == 0:
|
|
print(f' {stats["files_ok"]} files, '
|
|
f'{stats["bytes"]/1024**3:.2f}GB...', flush=True)
|
|
except Exception as e:
|
|
stats['files_err'] += 1
|
|
|
|
elif ftype == 7: # symlink
|
|
try:
|
|
r = subprocess.run(
|
|
['icat', DEVICE, str(child_ino)],
|
|
capture_output=True, timeout=10
|
|
)
|
|
target = r.stdout.decode('utf-8',errors='replace').strip()
|
|
if target:
|
|
if os.path.lexists(outpath): os.remove(outpath)
|
|
os.symlink(target, outpath)
|
|
stats['files_ok'] += 1
|
|
except:
|
|
stats['files_err'] += 1
|
|
|
|
print()
|
|
print('=== COMPLETE ===')
|
|
print(f'Directories: {stats["dirs"]}')
|
|
print(f'Files OK: {stats["files_ok"]}')
|
|
print(f'Files ERR: {stats["files_err"]}')
|
|
print(f'Total data: {stats["bytes"]/1024**3:.2f}GB')
|
|
print(f'Output: {OUTDIR}')
|
|
|
|
EOF
|
|
|
|
python3 /tmp/reconstruct_tree.py 2>&1 | tee /tmp/reconstruct.log
|