#!/usr/bin/env python3 """ Full filesystem extraction using TSK ils + fls + icat. Strategy: 1. ils - get every allocated inode 2. For each inode, determine if file or directory 3. Build directory tree bottom-up using parent pointers (..) 4. Extract everything, place orphans in /orphans/ """ import subprocess, os, sys, struct, collections DEVICE = '/dev/nbd0' OUTDIR = '/mnt/recovered' IPG = 8192 MIN_GOOD_GROUP = 13 # groups 0-12 are zeroed def run(cmd, timeout=600): try: r = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) return r.stdout, r.stderr except subprocess.TimeoutExpired: return '', 'timeout' except Exception as e: return '', str(e) def run_binary(cmd, timeout=600): try: r = subprocess.run(cmd, capture_output=True, timeout=timeout) return r.stdout except: return b'' # ── Phase 1: get all allocated inodes via ils ───────────────────────────────── def get_all_inodes(): print('Running ils to enumerate all allocated inodes...') print('(This may take 30-60 minutes for a 4.4TB filesystem)') stdout, _ = run(['ils', '-e', DEVICE], timeout=7200) inodes = {} # inode -> {'type': 'f'/'d', 'size': n, 'mtime': n} for line in stdout.splitlines(): if line.startswith('|') or not line.strip(): continue try: # ils -e format: # inode|alloc|uid|gid|mtime|atime|ctime|dtime|mode|nlink|size|... fields = line.split('|') ino = int(fields[0]) alloc = fields[1] # 'a' = allocated, 'f' = free mode = int(fields[8]) if len(fields) > 8 else 0 size = int(fields[10]) if len(fields) > 10 else 0 mtime = int(fields[4]) if len(fields) > 4 else 0 if alloc != 'a': continue if ino <= 11: continue grp = (ino - 1) // IPG if grp < MIN_GOOD_GROUP: continue # Determine type from mode ftype = (mode & 0o170000) if ftype == 0o040000: t = 'd' elif ftype == 0o100000: t = 'f' elif ftype == 0o120000: t = 'l' else: t = 'o' # other inodes[ino] = {'type': t, 'size': size, 'mtime': mtime} except (ValueError, IndexError): continue print(f'Found {len(inodes)} allocated inodes in intact groups') dirs = sum(1 for v in inodes.values() if v['type'] == 'd') files = sum(1 for v in inodes.values() if v['type'] == 'f') print(f' Directories: {dirs}') print(f' Files: {files}') return inodes # ── Phase 2: build directory tree using fls ─────────────────────────────────── def build_tree(dir_inodes): """ For each directory inode, run fls to get its contents. Build a map of inode -> (parent_inode, name). """ print(f'\nBuilding directory tree from {len(dir_inodes)} directory inodes...') # inode -> (parent_inode, name) inode_path = {} # inode -> [(child_inode, name, type)] inode_children = collections.defaultdict(list) processed = 0 for dir_ino in dir_inodes: stdout, _ = run(['fls', DEVICE, str(dir_ino)], timeout=30) parent_ino = None for line in stdout.splitlines(): try: parts = line.split(None, 2) if len(parts) < 3: continue type_str = parts[0] ino_str = parts[1].rstrip(':').lstrip('*') name = parts[2].strip() ino = int(ino_str) etype = type_str[0] if name == '..': parent_ino = ino continue if name == '.': continue inode_children[dir_ino].append((ino, name, etype)) except (ValueError, IndexError): continue if parent_ino is not None: inode_path[dir_ino] = parent_ino processed += 1 if processed % 1000 == 0: print(f' Processed {processed}/{len(dir_inodes)} directories...', flush=True) return inode_path, inode_children # ── Phase 3: resolve paths ──────────────────────────────────────────────────── def resolve_paths(inode_path, inode_children, all_inodes): """ Walk parent pointers to build full paths for each directory. Directories whose parent chain leads to a lost inode go to /orphans/. """ print('\nResolving full paths...') # resolved_dirs: inode -> full path string resolved = {} def get_path(ino, depth=0): if depth > 50: # cycle protection return None if ino in resolved: return resolved[ino] parent = inode_path.get(ino) if parent is None or parent == ino: # Root or unknown parent path = f'orphans/dir_{ino}' resolved[ino] = path return path grp = (parent - 1) // IPG if grp < MIN_GOOD_GROUP: # Parent is in lost group — this is an orphan root # Try to find the directory name from the parent's children # We can't — parent inode is gone path = f'orphans/dir_{ino}' resolved[ino] = path return path parent_path = get_path(parent, depth + 1) if parent_path is None: path = f'orphans/dir_{ino}' else: # Find our name in parent's children name = f'inode_{ino}' for child_ino, child_name, _ in inode_children.get(parent, []): if child_ino == ino: name = child_name break path = os.path.join(parent_path, name) resolved[ino] = path return path for ino in inode_path: get_path(ino) return resolved # ── Phase 4: extract ────────────────────────────────────────────────────────── def extract_all(resolved_dirs, inode_children, all_inodes): print(f'\nExtracting files...') stats = {'ok': 0, 'err': 0, 'bytes': 0} extracted = set() # Extract files reachable from directory tree for dir_ino, dir_path in resolved_dirs.items(): abs_dir = os.path.join(OUTDIR, dir_path) os.makedirs(abs_dir, exist_ok=True) for child_ino, name, etype in inode_children.get(dir_ino, []): if child_ino in extracted: continue outpath = os.path.join(abs_dir, name) if etype == 'r': try: os.makedirs(abs_dir, exist_ok=True) with open(outpath, 'wb') as f: subprocess.run( ['icat', DEVICE, str(child_ino)], stdout=f, stderr=subprocess.DEVNULL, timeout=600 ) size = os.path.getsize(outpath) stats['ok'] += 1 stats['bytes'] += size extracted.add(child_ino) if stats['ok'] % 100 == 0: print(f' {stats["ok"]} files extracted, ' f'{stats["bytes"]/1024**3:.2f}GB...', flush=True) except Exception as e: stats['err'] += 1 elif etype == 'l': try: r = subprocess.run( ['icat', DEVICE, str(child_ino)], capture_output=True, timeout=10 ) target = r.stdout.decode('utf-8', errors='replace').strip() if target: if os.path.lexists(outpath): os.remove(outpath) os.symlink(target, outpath) extracted.add(child_ino) stats['ok'] += 1 except: stats['err'] += 1 # Extract orphaned files (allocated but not in any directory) print(f'\nExtracting orphaned files...') orphan_dir = os.path.join(OUTDIR, 'orphans', 'files') os.makedirs(orphan_dir, exist_ok=True) for ino, info in all_inodes.items(): if ino in extracted: continue if info['type'] != 'f': continue if info['size'] == 0: continue outpath = os.path.join(orphan_dir, str(ino)) try: with open(outpath, 'wb') as f: subprocess.run( ['icat', DEVICE, str(ino)], stdout=f, stderr=subprocess.DEVNULL, timeout=600 ) size = os.path.getsize(outpath) if size > 0: stats['ok'] += 1 stats['bytes'] += size extracted.add(ino) else: os.remove(outpath) except: stats['err'] += 1 return stats def main(): os.makedirs(OUTDIR, exist_ok=True) print(f'Device : {DEVICE}') print(f'Output : {OUTDIR}') print() # Phase 1: enumerate all inodes all_inodes = get_all_inodes() if not all_inodes: print('ERROR: ils returned no inodes - is NBD server running?') sys.exit(1) dir_inodes = [ino for ino, info in all_inodes.items() if info['type'] == 'd'] # Phase 2: build tree inode_path, inode_children = build_tree(dir_inodes) # Phase 3: resolve paths resolved_dirs = resolve_paths(inode_path, inode_children, all_inodes) intact = sum(1 for p in resolved_dirs.values() if not p.startswith('orphans')) orphaned = sum(1 for p in resolved_dirs.values() if p.startswith('orphans')) print(f'Directories with resolved paths: {intact}') print(f'Orphaned directories: {orphaned}') # Phase 4: extract stats = extract_all(resolved_dirs, inode_children, all_inodes) print() print('=== COMPLETE ===') print(f'Files OK: {stats["ok"]}') print(f'Files ERR: {stats["err"]}') print(f'Total data: {stats["bytes"]/1024**3:.2f} GB') print(f'Output: {OUTDIR}') if __name__ == '__main__': main()