Initial remote commit

This commit is contained in:
2026-04-30 11:04:05 +00:00
commit b86e4f9a98
103 changed files with 262770 additions and 0 deletions

306
test/rebuild.py Normal file
View File

@@ -0,0 +1,306 @@
#!/usr/bin/env python3
"""
Full filesystem extraction using TSK ils + fls + icat.
Strategy:
1. ils - get every allocated inode
2. For each inode, determine if file or directory
3. Build directory tree bottom-up using parent pointers (..)
4. Extract everything, place orphans in /orphans/<inode>
"""
import subprocess, os, sys, struct, collections
DEVICE = '/dev/nbd0'
OUTDIR = '/mnt/recovered'
IPG = 8192
MIN_GOOD_GROUP = 13 # groups 0-12 are zeroed
def run(cmd, timeout=600):
try:
r = subprocess.run(cmd, capture_output=True,
text=True, timeout=timeout)
return r.stdout, r.stderr
except subprocess.TimeoutExpired:
return '', 'timeout'
except Exception as e:
return '', str(e)
def run_binary(cmd, timeout=600):
try:
r = subprocess.run(cmd, capture_output=True, timeout=timeout)
return r.stdout
except:
return b''
# ── Phase 1: get all allocated inodes via ils ─────────────────────────────────
def get_all_inodes():
print('Running ils to enumerate all allocated inodes...')
print('(This may take 30-60 minutes for a 4.4TB filesystem)')
stdout, _ = run(['ils', '-e', DEVICE], timeout=7200)
inodes = {} # inode -> {'type': 'f'/'d', 'size': n, 'mtime': n}
for line in stdout.splitlines():
if line.startswith('|') or not line.strip():
continue
try:
# ils -e format:
# inode|alloc|uid|gid|mtime|atime|ctime|dtime|mode|nlink|size|...
fields = line.split('|')
ino = int(fields[0])
alloc = fields[1] # 'a' = allocated, 'f' = free
mode = int(fields[8]) if len(fields) > 8 else 0
size = int(fields[10]) if len(fields) > 10 else 0
mtime = int(fields[4]) if len(fields) > 4 else 0
if alloc != 'a':
continue
if ino <= 11:
continue
grp = (ino - 1) // IPG
if grp < MIN_GOOD_GROUP:
continue
# Determine type from mode
ftype = (mode & 0o170000)
if ftype == 0o040000:
t = 'd'
elif ftype == 0o100000:
t = 'f'
elif ftype == 0o120000:
t = 'l'
else:
t = 'o' # other
inodes[ino] = {'type': t, 'size': size, 'mtime': mtime}
except (ValueError, IndexError):
continue
print(f'Found {len(inodes)} allocated inodes in intact groups')
dirs = sum(1 for v in inodes.values() if v['type'] == 'd')
files = sum(1 for v in inodes.values() if v['type'] == 'f')
print(f' Directories: {dirs}')
print(f' Files: {files}')
return inodes
# ── Phase 2: build directory tree using fls ───────────────────────────────────
def build_tree(dir_inodes):
"""
For each directory inode, run fls to get its contents.
Build a map of inode -> (parent_inode, name).
"""
print(f'\nBuilding directory tree from {len(dir_inodes)} directory inodes...')
# inode -> (parent_inode, name)
inode_path = {}
# inode -> [(child_inode, name, type)]
inode_children = collections.defaultdict(list)
processed = 0
for dir_ino in dir_inodes:
stdout, _ = run(['fls', DEVICE, str(dir_ino)], timeout=30)
parent_ino = None
for line in stdout.splitlines():
try:
parts = line.split(None, 2)
if len(parts) < 3: continue
type_str = parts[0]
ino_str = parts[1].rstrip(':').lstrip('*')
name = parts[2].strip()
ino = int(ino_str)
etype = type_str[0]
if name == '..':
parent_ino = ino
continue
if name == '.':
continue
inode_children[dir_ino].append((ino, name, etype))
except (ValueError, IndexError):
continue
if parent_ino is not None:
inode_path[dir_ino] = parent_ino
processed += 1
if processed % 1000 == 0:
print(f' Processed {processed}/{len(dir_inodes)} directories...',
flush=True)
return inode_path, inode_children
# ── Phase 3: resolve paths ────────────────────────────────────────────────────
def resolve_paths(inode_path, inode_children, all_inodes):
"""
Walk parent pointers to build full paths for each directory.
Directories whose parent chain leads to a lost inode go to /orphans/.
"""
print('\nResolving full paths...')
# resolved_dirs: inode -> full path string
resolved = {}
def get_path(ino, depth=0):
if depth > 50: # cycle protection
return None
if ino in resolved:
return resolved[ino]
parent = inode_path.get(ino)
if parent is None or parent == ino:
# Root or unknown parent
path = f'orphans/dir_{ino}'
resolved[ino] = path
return path
grp = (parent - 1) // IPG
if grp < MIN_GOOD_GROUP:
# Parent is in lost group — this is an orphan root
# Try to find the directory name from the parent's children
# We can't — parent inode is gone
path = f'orphans/dir_{ino}'
resolved[ino] = path
return path
parent_path = get_path(parent, depth + 1)
if parent_path is None:
path = f'orphans/dir_{ino}'
else:
# Find our name in parent's children
name = f'inode_{ino}'
for child_ino, child_name, _ in inode_children.get(parent, []):
if child_ino == ino:
name = child_name
break
path = os.path.join(parent_path, name)
resolved[ino] = path
return path
for ino in inode_path:
get_path(ino)
return resolved
# ── Phase 4: extract ──────────────────────────────────────────────────────────
def extract_all(resolved_dirs, inode_children, all_inodes):
print(f'\nExtracting files...')
stats = {'ok': 0, 'err': 0, 'bytes': 0}
extracted = set()
# Extract files reachable from directory tree
for dir_ino, dir_path in resolved_dirs.items():
abs_dir = os.path.join(OUTDIR, dir_path)
os.makedirs(abs_dir, exist_ok=True)
for child_ino, name, etype in inode_children.get(dir_ino, []):
if child_ino in extracted:
continue
outpath = os.path.join(abs_dir, name)
if etype == 'r':
try:
os.makedirs(abs_dir, exist_ok=True)
with open(outpath, 'wb') as f:
subprocess.run(
['icat', DEVICE, str(child_ino)],
stdout=f, stderr=subprocess.DEVNULL,
timeout=600
)
size = os.path.getsize(outpath)
stats['ok'] += 1
stats['bytes'] += size
extracted.add(child_ino)
if stats['ok'] % 100 == 0:
print(f' {stats["ok"]} files extracted, '
f'{stats["bytes"]/1024**3:.2f}GB...', flush=True)
except Exception as e:
stats['err'] += 1
elif etype == 'l':
try:
r = subprocess.run(
['icat', DEVICE, str(child_ino)],
capture_output=True, timeout=10
)
target = r.stdout.decode('utf-8', errors='replace').strip()
if target:
if os.path.lexists(outpath): os.remove(outpath)
os.symlink(target, outpath)
extracted.add(child_ino)
stats['ok'] += 1
except:
stats['err'] += 1
# Extract orphaned files (allocated but not in any directory)
print(f'\nExtracting orphaned files...')
orphan_dir = os.path.join(OUTDIR, 'orphans', 'files')
os.makedirs(orphan_dir, exist_ok=True)
for ino, info in all_inodes.items():
if ino in extracted: continue
if info['type'] != 'f': continue
if info['size'] == 0: continue
outpath = os.path.join(orphan_dir, str(ino))
try:
with open(outpath, 'wb') as f:
subprocess.run(
['icat', DEVICE, str(ino)],
stdout=f, stderr=subprocess.DEVNULL,
timeout=600
)
size = os.path.getsize(outpath)
if size > 0:
stats['ok'] += 1
stats['bytes'] += size
extracted.add(ino)
else:
os.remove(outpath)
except:
stats['err'] += 1
return stats
def main():
os.makedirs(OUTDIR, exist_ok=True)
print(f'Device : {DEVICE}')
print(f'Output : {OUTDIR}')
print()
# Phase 1: enumerate all inodes
all_inodes = get_all_inodes()
if not all_inodes:
print('ERROR: ils returned no inodes - is NBD server running?')
sys.exit(1)
dir_inodes = [ino for ino, info in all_inodes.items()
if info['type'] == 'd']
# Phase 2: build tree
inode_path, inode_children = build_tree(dir_inodes)
# Phase 3: resolve paths
resolved_dirs = resolve_paths(inode_path, inode_children, all_inodes)
intact = sum(1 for p in resolved_dirs.values()
if not p.startswith('orphans'))
orphaned = sum(1 for p in resolved_dirs.values()
if p.startswith('orphans'))
print(f'Directories with resolved paths: {intact}')
print(f'Orphaned directories: {orphaned}')
# Phase 4: extract
stats = extract_all(resolved_dirs, inode_children, all_inodes)
print()
print('=== COMPLETE ===')
print(f'Files OK: {stats["ok"]}')
print(f'Files ERR: {stats["err"]}')
print(f'Total data: {stats["bytes"]/1024**3:.2f} GB')
print(f'Output: {OUTDIR}')
if __name__ == '__main__':
main()