307 lines
10 KiB
Python
307 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Full filesystem extraction using TSK ils + fls + icat.
|
|
|
|
Strategy:
|
|
1. ils - get every allocated inode
|
|
2. For each inode, determine if file or directory
|
|
3. Build directory tree bottom-up using parent pointers (..)
|
|
4. Extract everything, place orphans in /orphans/<inode>
|
|
"""
|
|
import subprocess, os, sys, struct, collections
|
|
|
|
DEVICE = '/dev/nbd0'
|
|
OUTDIR = '/mnt/recovered'
|
|
IPG = 8192
|
|
MIN_GOOD_GROUP = 13 # groups 0-12 are zeroed
|
|
|
|
def run(cmd, timeout=600):
|
|
try:
|
|
r = subprocess.run(cmd, capture_output=True,
|
|
text=True, timeout=timeout)
|
|
return r.stdout, r.stderr
|
|
except subprocess.TimeoutExpired:
|
|
return '', 'timeout'
|
|
except Exception as e:
|
|
return '', str(e)
|
|
|
|
def run_binary(cmd, timeout=600):
|
|
try:
|
|
r = subprocess.run(cmd, capture_output=True, timeout=timeout)
|
|
return r.stdout
|
|
except:
|
|
return b''
|
|
|
|
# ── Phase 1: get all allocated inodes via ils ─────────────────────────────────
|
|
def get_all_inodes():
|
|
print('Running ils to enumerate all allocated inodes...')
|
|
print('(This may take 30-60 minutes for a 4.4TB filesystem)')
|
|
stdout, _ = run(['ils', '-e', DEVICE], timeout=7200)
|
|
|
|
inodes = {} # inode -> {'type': 'f'/'d', 'size': n, 'mtime': n}
|
|
for line in stdout.splitlines():
|
|
if line.startswith('|') or not line.strip():
|
|
continue
|
|
try:
|
|
# ils -e format:
|
|
# inode|alloc|uid|gid|mtime|atime|ctime|dtime|mode|nlink|size|...
|
|
fields = line.split('|')
|
|
ino = int(fields[0])
|
|
alloc = fields[1] # 'a' = allocated, 'f' = free
|
|
mode = int(fields[8]) if len(fields) > 8 else 0
|
|
size = int(fields[10]) if len(fields) > 10 else 0
|
|
mtime = int(fields[4]) if len(fields) > 4 else 0
|
|
|
|
if alloc != 'a':
|
|
continue
|
|
if ino <= 11:
|
|
continue
|
|
grp = (ino - 1) // IPG
|
|
if grp < MIN_GOOD_GROUP:
|
|
continue
|
|
|
|
# Determine type from mode
|
|
ftype = (mode & 0o170000)
|
|
if ftype == 0o040000:
|
|
t = 'd'
|
|
elif ftype == 0o100000:
|
|
t = 'f'
|
|
elif ftype == 0o120000:
|
|
t = 'l'
|
|
else:
|
|
t = 'o' # other
|
|
|
|
inodes[ino] = {'type': t, 'size': size, 'mtime': mtime}
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
print(f'Found {len(inodes)} allocated inodes in intact groups')
|
|
dirs = sum(1 for v in inodes.values() if v['type'] == 'd')
|
|
files = sum(1 for v in inodes.values() if v['type'] == 'f')
|
|
print(f' Directories: {dirs}')
|
|
print(f' Files: {files}')
|
|
return inodes
|
|
|
|
# ── Phase 2: build directory tree using fls ───────────────────────────────────
|
|
def build_tree(dir_inodes):
|
|
"""
|
|
For each directory inode, run fls to get its contents.
|
|
Build a map of inode -> (parent_inode, name).
|
|
"""
|
|
print(f'\nBuilding directory tree from {len(dir_inodes)} directory inodes...')
|
|
|
|
# inode -> (parent_inode, name)
|
|
inode_path = {}
|
|
# inode -> [(child_inode, name, type)]
|
|
inode_children = collections.defaultdict(list)
|
|
|
|
processed = 0
|
|
for dir_ino in dir_inodes:
|
|
stdout, _ = run(['fls', DEVICE, str(dir_ino)], timeout=30)
|
|
parent_ino = None
|
|
|
|
for line in stdout.splitlines():
|
|
try:
|
|
parts = line.split(None, 2)
|
|
if len(parts) < 3: continue
|
|
type_str = parts[0]
|
|
ino_str = parts[1].rstrip(':').lstrip('*')
|
|
name = parts[2].strip()
|
|
ino = int(ino_str)
|
|
etype = type_str[0]
|
|
|
|
if name == '..':
|
|
parent_ino = ino
|
|
continue
|
|
if name == '.':
|
|
continue
|
|
|
|
inode_children[dir_ino].append((ino, name, etype))
|
|
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
if parent_ino is not None:
|
|
inode_path[dir_ino] = parent_ino
|
|
|
|
processed += 1
|
|
if processed % 1000 == 0:
|
|
print(f' Processed {processed}/{len(dir_inodes)} directories...',
|
|
flush=True)
|
|
|
|
return inode_path, inode_children
|
|
|
|
# ── Phase 3: resolve paths ────────────────────────────────────────────────────
|
|
def resolve_paths(inode_path, inode_children, all_inodes):
|
|
"""
|
|
Walk parent pointers to build full paths for each directory.
|
|
Directories whose parent chain leads to a lost inode go to /orphans/.
|
|
"""
|
|
print('\nResolving full paths...')
|
|
|
|
# resolved_dirs: inode -> full path string
|
|
resolved = {}
|
|
|
|
def get_path(ino, depth=0):
|
|
if depth > 50: # cycle protection
|
|
return None
|
|
if ino in resolved:
|
|
return resolved[ino]
|
|
|
|
parent = inode_path.get(ino)
|
|
if parent is None or parent == ino:
|
|
# Root or unknown parent
|
|
path = f'orphans/dir_{ino}'
|
|
resolved[ino] = path
|
|
return path
|
|
|
|
grp = (parent - 1) // IPG
|
|
if grp < MIN_GOOD_GROUP:
|
|
# Parent is in lost group — this is an orphan root
|
|
# Try to find the directory name from the parent's children
|
|
# We can't — parent inode is gone
|
|
path = f'orphans/dir_{ino}'
|
|
resolved[ino] = path
|
|
return path
|
|
|
|
parent_path = get_path(parent, depth + 1)
|
|
if parent_path is None:
|
|
path = f'orphans/dir_{ino}'
|
|
else:
|
|
# Find our name in parent's children
|
|
name = f'inode_{ino}'
|
|
for child_ino, child_name, _ in inode_children.get(parent, []):
|
|
if child_ino == ino:
|
|
name = child_name
|
|
break
|
|
path = os.path.join(parent_path, name)
|
|
|
|
resolved[ino] = path
|
|
return path
|
|
|
|
for ino in inode_path:
|
|
get_path(ino)
|
|
|
|
return resolved
|
|
|
|
# ── Phase 4: extract ──────────────────────────────────────────────────────────
|
|
def extract_all(resolved_dirs, inode_children, all_inodes):
|
|
print(f'\nExtracting files...')
|
|
stats = {'ok': 0, 'err': 0, 'bytes': 0}
|
|
extracted = set()
|
|
|
|
# Extract files reachable from directory tree
|
|
for dir_ino, dir_path in resolved_dirs.items():
|
|
abs_dir = os.path.join(OUTDIR, dir_path)
|
|
os.makedirs(abs_dir, exist_ok=True)
|
|
|
|
for child_ino, name, etype in inode_children.get(dir_ino, []):
|
|
if child_ino in extracted:
|
|
continue
|
|
outpath = os.path.join(abs_dir, name)
|
|
|
|
if etype == 'r':
|
|
try:
|
|
os.makedirs(abs_dir, exist_ok=True)
|
|
with open(outpath, 'wb') as f:
|
|
subprocess.run(
|
|
['icat', DEVICE, str(child_ino)],
|
|
stdout=f, stderr=subprocess.DEVNULL,
|
|
timeout=600
|
|
)
|
|
size = os.path.getsize(outpath)
|
|
stats['ok'] += 1
|
|
stats['bytes'] += size
|
|
extracted.add(child_ino)
|
|
if stats['ok'] % 100 == 0:
|
|
print(f' {stats["ok"]} files extracted, '
|
|
f'{stats["bytes"]/1024**3:.2f}GB...', flush=True)
|
|
except Exception as e:
|
|
stats['err'] += 1
|
|
|
|
elif etype == 'l':
|
|
try:
|
|
r = subprocess.run(
|
|
['icat', DEVICE, str(child_ino)],
|
|
capture_output=True, timeout=10
|
|
)
|
|
target = r.stdout.decode('utf-8', errors='replace').strip()
|
|
if target:
|
|
if os.path.lexists(outpath): os.remove(outpath)
|
|
os.symlink(target, outpath)
|
|
extracted.add(child_ino)
|
|
stats['ok'] += 1
|
|
except:
|
|
stats['err'] += 1
|
|
|
|
# Extract orphaned files (allocated but not in any directory)
|
|
print(f'\nExtracting orphaned files...')
|
|
orphan_dir = os.path.join(OUTDIR, 'orphans', 'files')
|
|
os.makedirs(orphan_dir, exist_ok=True)
|
|
|
|
for ino, info in all_inodes.items():
|
|
if ino in extracted: continue
|
|
if info['type'] != 'f': continue
|
|
if info['size'] == 0: continue
|
|
|
|
outpath = os.path.join(orphan_dir, str(ino))
|
|
try:
|
|
with open(outpath, 'wb') as f:
|
|
subprocess.run(
|
|
['icat', DEVICE, str(ino)],
|
|
stdout=f, stderr=subprocess.DEVNULL,
|
|
timeout=600
|
|
)
|
|
size = os.path.getsize(outpath)
|
|
if size > 0:
|
|
stats['ok'] += 1
|
|
stats['bytes'] += size
|
|
extracted.add(ino)
|
|
else:
|
|
os.remove(outpath)
|
|
except:
|
|
stats['err'] += 1
|
|
|
|
return stats
|
|
|
|
def main():
|
|
os.makedirs(OUTDIR, exist_ok=True)
|
|
print(f'Device : {DEVICE}')
|
|
print(f'Output : {OUTDIR}')
|
|
print()
|
|
|
|
# Phase 1: enumerate all inodes
|
|
all_inodes = get_all_inodes()
|
|
if not all_inodes:
|
|
print('ERROR: ils returned no inodes - is NBD server running?')
|
|
sys.exit(1)
|
|
|
|
dir_inodes = [ino for ino, info in all_inodes.items()
|
|
if info['type'] == 'd']
|
|
|
|
# Phase 2: build tree
|
|
inode_path, inode_children = build_tree(dir_inodes)
|
|
|
|
# Phase 3: resolve paths
|
|
resolved_dirs = resolve_paths(inode_path, inode_children, all_inodes)
|
|
|
|
intact = sum(1 for p in resolved_dirs.values()
|
|
if not p.startswith('orphans'))
|
|
orphaned = sum(1 for p in resolved_dirs.values()
|
|
if p.startswith('orphans'))
|
|
print(f'Directories with resolved paths: {intact}')
|
|
print(f'Orphaned directories: {orphaned}')
|
|
|
|
# Phase 4: extract
|
|
stats = extract_all(resolved_dirs, inode_children, all_inodes)
|
|
|
|
print()
|
|
print('=== COMPLETE ===')
|
|
print(f'Files OK: {stats["ok"]}')
|
|
print(f'Files ERR: {stats["err"]}')
|
|
print(f'Total data: {stats["bytes"]/1024**3:.2f} GB')
|
|
print(f'Output: {OUTDIR}')
|
|
|
|
if __name__ == '__main__':
|
|
main()
|