136 lines
4.9 KiB
Python
Executable File
136 lines
4.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
Stage 1 – Scan ext4 inode tables and persist everything to a SQLite database.
|
||
|
||
This is the slow stage that reads the raw device sequentially. Run it once;
|
||
subsequent pipeline stages read from the database and never touch the device.
|
||
|
||
The scan is resumable: already-scanned groups are skipped on re-run.
|
||
|
||
Usage:
|
||
python3 scan_inodes.py [options]
|
||
|
||
Options:
|
||
--device DEV Block device or image file [/dev/dm-0]
|
||
--backup-sb BLOCK Block number of the backup superblock [32768]
|
||
--db PATH Output SQLite database [inodes.db]
|
||
--zeroed-groups N First N block groups are damaged/zeroed (skip scan,
|
||
mark parents in them as orphan triggers) [13]
|
||
--start-group N Start scanning from group N (override resume logic)
|
||
--end-group N Stop after group N (for partial scans / testing)
|
||
"""
|
||
import argparse, sys, time
|
||
import ext4lib
|
||
import ext4db
|
||
|
||
DEFAULT_DEV = '/dev/dm-0'
|
||
DEFAULT_BACKUP_SB = 32768
|
||
COMMIT_INTERVAL = 20 # commit every N groups
|
||
|
||
|
||
def scan_group(f, sb, gdt_data, grp, db):
|
||
inode_table_block = ext4lib.parse_gdt_entry(
|
||
gdt_data, grp * sb['desc_size'], sb['desc_size'])
|
||
if inode_table_block == 0:
|
||
ext4db.mark_group_scanned(db, grp)
|
||
return 0
|
||
|
||
inode_size = sb['inode_size']
|
||
inodes_per_block = ext4lib.BLOCK // inode_size
|
||
num_inode_blocks = (sb['inodes_per_group'] * inode_size + ext4lib.BLOCK - 1) // ext4lib.BLOCK
|
||
|
||
found = 0
|
||
for blk_off in range(num_inode_blocks):
|
||
try:
|
||
idata = ext4lib.read_at(
|
||
f, (inode_table_block + blk_off) * ext4lib.BLOCK, ext4lib.BLOCK)
|
||
except OSError:
|
||
continue
|
||
|
||
for slot_idx in range(inodes_per_block):
|
||
ino_off = slot_idx * inode_size
|
||
abs_inum = grp * sb['inodes_per_group'] + blk_off * inodes_per_block + slot_idx + 1
|
||
|
||
inode = ext4lib.parse_inode_full(idata, ino_off, sb)
|
||
if inode is None or inode['mode'] == 0:
|
||
continue
|
||
|
||
status = ext4lib.classify_inode(idata, ino_off)
|
||
ext4db.save_inode(db, abs_inum, grp, inode, status)
|
||
found += 1
|
||
|
||
# For directories with links, also read dir entries from disk
|
||
if inode['type'] == ext4lib.ITYPE_DIR and inode['links'] > 0:
|
||
try:
|
||
entries = ext4lib.read_dir_entries_raw(f, idata, ino_off)
|
||
for name, (child_inum, ftype) in entries.items():
|
||
ext4db.save_dir_entry(db, abs_inum, name, child_inum, ftype)
|
||
except Exception:
|
||
pass
|
||
|
||
ext4db.mark_group_scanned(db, grp)
|
||
return found
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description='Scan ext4 inodes into SQLite DB (Stage 1)')
|
||
parser.add_argument('--device', default=DEFAULT_DEV)
|
||
parser.add_argument('--backup-sb', type=int, default=DEFAULT_BACKUP_SB)
|
||
parser.add_argument('--db', default='inodes.db')
|
||
parser.add_argument('--zeroed-groups', type=int, default=13,
|
||
help='First N groups are damaged; skip scan, flag parents there as orphans')
|
||
parser.add_argument('--start-group', type=int, default=None,
|
||
help='Force start at this group (ignores resume state)')
|
||
parser.add_argument('--end-group', type=int, default=None)
|
||
args = parser.parse_args()
|
||
|
||
db = ext4db.open_db(args.db)
|
||
|
||
with open(args.device, 'rb') as f:
|
||
sb, gdt_data, num_groups = ext4lib.load_fs(f, args.backup_sb)
|
||
|
||
print(f"Geometry: {sb['blocks_per_group']} blk/grp, "
|
||
f"{sb['inodes_per_group']} ino/grp, "
|
||
f"inode_size={sb['inode_size']}, desc_size={sb['desc_size']}")
|
||
print(f"Total groups: {num_groups} | zeroed groups: 0–{args.zeroed_groups - 1}")
|
||
|
||
ext4db.save_fs_meta(db, sb, args.device, args.backup_sb, args.zeroed_groups)
|
||
|
||
scanned = ext4db.get_scanned_groups(db)
|
||
end = args.end_group if args.end_group is not None else num_groups
|
||
|
||
if args.start_group is not None:
|
||
start = args.start_group
|
||
else:
|
||
start = args.zeroed_groups
|
||
|
||
total_found = 0
|
||
t0 = time.monotonic()
|
||
|
||
for grp in range(start, end):
|
||
if grp in scanned:
|
||
continue
|
||
|
||
found = scan_group(f, sb, gdt_data, grp, db)
|
||
total_found += found
|
||
|
||
if grp % COMMIT_INTERVAL == 0:
|
||
db.commit()
|
||
|
||
if grp % 100 == 0:
|
||
elapsed = time.monotonic() - t0
|
||
rate = (grp - start + 1) / elapsed if elapsed > 0 else 0
|
||
eta = (end - grp) / rate if rate > 0 else 0
|
||
print(f" group {grp:6d}/{end} inodes={total_found:,} "
|
||
f"{rate:.1f} grp/s ETA {eta:.0f}s",
|
||
end='\r', flush=True)
|
||
|
||
db.commit()
|
||
|
||
print(f"\nScan complete.")
|
||
ext4db.print_stats(db)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|