Initial remote commit
This commit is contained in:
306
test/rebuild.py
Normal file
306
test/rebuild.py
Normal file
@@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Full filesystem extraction using TSK ils + fls + icat.
|
||||
|
||||
Strategy:
|
||||
1. ils - get every allocated inode
|
||||
2. For each inode, determine if file or directory
|
||||
3. Build directory tree bottom-up using parent pointers (..)
|
||||
4. Extract everything, place orphans in /orphans/<inode>
|
||||
"""
|
||||
import subprocess, os, sys, struct, collections
|
||||
|
||||
DEVICE = '/dev/nbd0'
|
||||
OUTDIR = '/mnt/recovered'
|
||||
IPG = 8192
|
||||
MIN_GOOD_GROUP = 13 # groups 0-12 are zeroed
|
||||
|
||||
def run(cmd, timeout=600):
|
||||
try:
|
||||
r = subprocess.run(cmd, capture_output=True,
|
||||
text=True, timeout=timeout)
|
||||
return r.stdout, r.stderr
|
||||
except subprocess.TimeoutExpired:
|
||||
return '', 'timeout'
|
||||
except Exception as e:
|
||||
return '', str(e)
|
||||
|
||||
def run_binary(cmd, timeout=600):
|
||||
try:
|
||||
r = subprocess.run(cmd, capture_output=True, timeout=timeout)
|
||||
return r.stdout
|
||||
except:
|
||||
return b''
|
||||
|
||||
# ── Phase 1: get all allocated inodes via ils ─────────────────────────────────
|
||||
def get_all_inodes():
|
||||
print('Running ils to enumerate all allocated inodes...')
|
||||
print('(This may take 30-60 minutes for a 4.4TB filesystem)')
|
||||
stdout, _ = run(['ils', '-e', DEVICE], timeout=7200)
|
||||
|
||||
inodes = {} # inode -> {'type': 'f'/'d', 'size': n, 'mtime': n}
|
||||
for line in stdout.splitlines():
|
||||
if line.startswith('|') or not line.strip():
|
||||
continue
|
||||
try:
|
||||
# ils -e format:
|
||||
# inode|alloc|uid|gid|mtime|atime|ctime|dtime|mode|nlink|size|...
|
||||
fields = line.split('|')
|
||||
ino = int(fields[0])
|
||||
alloc = fields[1] # 'a' = allocated, 'f' = free
|
||||
mode = int(fields[8]) if len(fields) > 8 else 0
|
||||
size = int(fields[10]) if len(fields) > 10 else 0
|
||||
mtime = int(fields[4]) if len(fields) > 4 else 0
|
||||
|
||||
if alloc != 'a':
|
||||
continue
|
||||
if ino <= 11:
|
||||
continue
|
||||
grp = (ino - 1) // IPG
|
||||
if grp < MIN_GOOD_GROUP:
|
||||
continue
|
||||
|
||||
# Determine type from mode
|
||||
ftype = (mode & 0o170000)
|
||||
if ftype == 0o040000:
|
||||
t = 'd'
|
||||
elif ftype == 0o100000:
|
||||
t = 'f'
|
||||
elif ftype == 0o120000:
|
||||
t = 'l'
|
||||
else:
|
||||
t = 'o' # other
|
||||
|
||||
inodes[ino] = {'type': t, 'size': size, 'mtime': mtime}
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
print(f'Found {len(inodes)} allocated inodes in intact groups')
|
||||
dirs = sum(1 for v in inodes.values() if v['type'] == 'd')
|
||||
files = sum(1 for v in inodes.values() if v['type'] == 'f')
|
||||
print(f' Directories: {dirs}')
|
||||
print(f' Files: {files}')
|
||||
return inodes
|
||||
|
||||
# ── Phase 2: build directory tree using fls ───────────────────────────────────
|
||||
def build_tree(dir_inodes):
|
||||
"""
|
||||
For each directory inode, run fls to get its contents.
|
||||
Build a map of inode -> (parent_inode, name).
|
||||
"""
|
||||
print(f'\nBuilding directory tree from {len(dir_inodes)} directory inodes...')
|
||||
|
||||
# inode -> (parent_inode, name)
|
||||
inode_path = {}
|
||||
# inode -> [(child_inode, name, type)]
|
||||
inode_children = collections.defaultdict(list)
|
||||
|
||||
processed = 0
|
||||
for dir_ino in dir_inodes:
|
||||
stdout, _ = run(['fls', DEVICE, str(dir_ino)], timeout=30)
|
||||
parent_ino = None
|
||||
|
||||
for line in stdout.splitlines():
|
||||
try:
|
||||
parts = line.split(None, 2)
|
||||
if len(parts) < 3: continue
|
||||
type_str = parts[0]
|
||||
ino_str = parts[1].rstrip(':').lstrip('*')
|
||||
name = parts[2].strip()
|
||||
ino = int(ino_str)
|
||||
etype = type_str[0]
|
||||
|
||||
if name == '..':
|
||||
parent_ino = ino
|
||||
continue
|
||||
if name == '.':
|
||||
continue
|
||||
|
||||
inode_children[dir_ino].append((ino, name, etype))
|
||||
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
if parent_ino is not None:
|
||||
inode_path[dir_ino] = parent_ino
|
||||
|
||||
processed += 1
|
||||
if processed % 1000 == 0:
|
||||
print(f' Processed {processed}/{len(dir_inodes)} directories...',
|
||||
flush=True)
|
||||
|
||||
return inode_path, inode_children
|
||||
|
||||
# ── Phase 3: resolve paths ────────────────────────────────────────────────────
|
||||
def resolve_paths(inode_path, inode_children, all_inodes):
|
||||
"""
|
||||
Walk parent pointers to build full paths for each directory.
|
||||
Directories whose parent chain leads to a lost inode go to /orphans/.
|
||||
"""
|
||||
print('\nResolving full paths...')
|
||||
|
||||
# resolved_dirs: inode -> full path string
|
||||
resolved = {}
|
||||
|
||||
def get_path(ino, depth=0):
|
||||
if depth > 50: # cycle protection
|
||||
return None
|
||||
if ino in resolved:
|
||||
return resolved[ino]
|
||||
|
||||
parent = inode_path.get(ino)
|
||||
if parent is None or parent == ino:
|
||||
# Root or unknown parent
|
||||
path = f'orphans/dir_{ino}'
|
||||
resolved[ino] = path
|
||||
return path
|
||||
|
||||
grp = (parent - 1) // IPG
|
||||
if grp < MIN_GOOD_GROUP:
|
||||
# Parent is in lost group — this is an orphan root
|
||||
# Try to find the directory name from the parent's children
|
||||
# We can't — parent inode is gone
|
||||
path = f'orphans/dir_{ino}'
|
||||
resolved[ino] = path
|
||||
return path
|
||||
|
||||
parent_path = get_path(parent, depth + 1)
|
||||
if parent_path is None:
|
||||
path = f'orphans/dir_{ino}'
|
||||
else:
|
||||
# Find our name in parent's children
|
||||
name = f'inode_{ino}'
|
||||
for child_ino, child_name, _ in inode_children.get(parent, []):
|
||||
if child_ino == ino:
|
||||
name = child_name
|
||||
break
|
||||
path = os.path.join(parent_path, name)
|
||||
|
||||
resolved[ino] = path
|
||||
return path
|
||||
|
||||
for ino in inode_path:
|
||||
get_path(ino)
|
||||
|
||||
return resolved
|
||||
|
||||
# ── Phase 4: extract ──────────────────────────────────────────────────────────
|
||||
def extract_all(resolved_dirs, inode_children, all_inodes):
|
||||
print(f'\nExtracting files...')
|
||||
stats = {'ok': 0, 'err': 0, 'bytes': 0}
|
||||
extracted = set()
|
||||
|
||||
# Extract files reachable from directory tree
|
||||
for dir_ino, dir_path in resolved_dirs.items():
|
||||
abs_dir = os.path.join(OUTDIR, dir_path)
|
||||
os.makedirs(abs_dir, exist_ok=True)
|
||||
|
||||
for child_ino, name, etype in inode_children.get(dir_ino, []):
|
||||
if child_ino in extracted:
|
||||
continue
|
||||
outpath = os.path.join(abs_dir, name)
|
||||
|
||||
if etype == 'r':
|
||||
try:
|
||||
os.makedirs(abs_dir, exist_ok=True)
|
||||
with open(outpath, 'wb') as f:
|
||||
subprocess.run(
|
||||
['icat', DEVICE, str(child_ino)],
|
||||
stdout=f, stderr=subprocess.DEVNULL,
|
||||
timeout=600
|
||||
)
|
||||
size = os.path.getsize(outpath)
|
||||
stats['ok'] += 1
|
||||
stats['bytes'] += size
|
||||
extracted.add(child_ino)
|
||||
if stats['ok'] % 100 == 0:
|
||||
print(f' {stats["ok"]} files extracted, '
|
||||
f'{stats["bytes"]/1024**3:.2f}GB...', flush=True)
|
||||
except Exception as e:
|
||||
stats['err'] += 1
|
||||
|
||||
elif etype == 'l':
|
||||
try:
|
||||
r = subprocess.run(
|
||||
['icat', DEVICE, str(child_ino)],
|
||||
capture_output=True, timeout=10
|
||||
)
|
||||
target = r.stdout.decode('utf-8', errors='replace').strip()
|
||||
if target:
|
||||
if os.path.lexists(outpath): os.remove(outpath)
|
||||
os.symlink(target, outpath)
|
||||
extracted.add(child_ino)
|
||||
stats['ok'] += 1
|
||||
except:
|
||||
stats['err'] += 1
|
||||
|
||||
# Extract orphaned files (allocated but not in any directory)
|
||||
print(f'\nExtracting orphaned files...')
|
||||
orphan_dir = os.path.join(OUTDIR, 'orphans', 'files')
|
||||
os.makedirs(orphan_dir, exist_ok=True)
|
||||
|
||||
for ino, info in all_inodes.items():
|
||||
if ino in extracted: continue
|
||||
if info['type'] != 'f': continue
|
||||
if info['size'] == 0: continue
|
||||
|
||||
outpath = os.path.join(orphan_dir, str(ino))
|
||||
try:
|
||||
with open(outpath, 'wb') as f:
|
||||
subprocess.run(
|
||||
['icat', DEVICE, str(ino)],
|
||||
stdout=f, stderr=subprocess.DEVNULL,
|
||||
timeout=600
|
||||
)
|
||||
size = os.path.getsize(outpath)
|
||||
if size > 0:
|
||||
stats['ok'] += 1
|
||||
stats['bytes'] += size
|
||||
extracted.add(ino)
|
||||
else:
|
||||
os.remove(outpath)
|
||||
except:
|
||||
stats['err'] += 1
|
||||
|
||||
return stats
|
||||
|
||||
def main():
|
||||
os.makedirs(OUTDIR, exist_ok=True)
|
||||
print(f'Device : {DEVICE}')
|
||||
print(f'Output : {OUTDIR}')
|
||||
print()
|
||||
|
||||
# Phase 1: enumerate all inodes
|
||||
all_inodes = get_all_inodes()
|
||||
if not all_inodes:
|
||||
print('ERROR: ils returned no inodes - is NBD server running?')
|
||||
sys.exit(1)
|
||||
|
||||
dir_inodes = [ino for ino, info in all_inodes.items()
|
||||
if info['type'] == 'd']
|
||||
|
||||
# Phase 2: build tree
|
||||
inode_path, inode_children = build_tree(dir_inodes)
|
||||
|
||||
# Phase 3: resolve paths
|
||||
resolved_dirs = resolve_paths(inode_path, inode_children, all_inodes)
|
||||
|
||||
intact = sum(1 for p in resolved_dirs.values()
|
||||
if not p.startswith('orphans'))
|
||||
orphaned = sum(1 for p in resolved_dirs.values()
|
||||
if p.startswith('orphans'))
|
||||
print(f'Directories with resolved paths: {intact}')
|
||||
print(f'Orphaned directories: {orphaned}')
|
||||
|
||||
# Phase 4: extract
|
||||
stats = extract_all(resolved_dirs, inode_children, all_inodes)
|
||||
|
||||
print()
|
||||
print('=== COMPLETE ===')
|
||||
print(f'Files OK: {stats["ok"]}')
|
||||
print(f'Files ERR: {stats["err"]}')
|
||||
print(f'Total data: {stats["bytes"]/1024**3:.2f} GB')
|
||||
print(f'Output: {OUTDIR}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user