Initial remote commit
This commit is contained in:
135
scan_inodes.py
Executable file
135
scan_inodes.py
Executable file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Stage 1 – Scan ext4 inode tables and persist everything to a SQLite database.
|
||||
|
||||
This is the slow stage that reads the raw device sequentially. Run it once;
|
||||
subsequent pipeline stages read from the database and never touch the device.
|
||||
|
||||
The scan is resumable: already-scanned groups are skipped on re-run.
|
||||
|
||||
Usage:
|
||||
python3 scan_inodes.py [options]
|
||||
|
||||
Options:
|
||||
--device DEV Block device or image file [/dev/dm-0]
|
||||
--backup-sb BLOCK Block number of the backup superblock [32768]
|
||||
--db PATH Output SQLite database [inodes.db]
|
||||
--zeroed-groups N First N block groups are damaged/zeroed (skip scan,
|
||||
mark parents in them as orphan triggers) [13]
|
||||
--start-group N Start scanning from group N (override resume logic)
|
||||
--end-group N Stop after group N (for partial scans / testing)
|
||||
"""
|
||||
import argparse, sys, time
|
||||
import ext4lib
|
||||
import ext4db
|
||||
|
||||
DEFAULT_DEV = '/dev/dm-0'
|
||||
DEFAULT_BACKUP_SB = 32768
|
||||
COMMIT_INTERVAL = 20 # commit every N groups
|
||||
|
||||
|
||||
def scan_group(f, sb, gdt_data, grp, db):
|
||||
inode_table_block = ext4lib.parse_gdt_entry(
|
||||
gdt_data, grp * sb['desc_size'], sb['desc_size'])
|
||||
if inode_table_block == 0:
|
||||
ext4db.mark_group_scanned(db, grp)
|
||||
return 0
|
||||
|
||||
inode_size = sb['inode_size']
|
||||
inodes_per_block = ext4lib.BLOCK // inode_size
|
||||
num_inode_blocks = (sb['inodes_per_group'] * inode_size + ext4lib.BLOCK - 1) // ext4lib.BLOCK
|
||||
|
||||
found = 0
|
||||
for blk_off in range(num_inode_blocks):
|
||||
try:
|
||||
idata = ext4lib.read_at(
|
||||
f, (inode_table_block + blk_off) * ext4lib.BLOCK, ext4lib.BLOCK)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
for slot_idx in range(inodes_per_block):
|
||||
ino_off = slot_idx * inode_size
|
||||
abs_inum = grp * sb['inodes_per_group'] + blk_off * inodes_per_block + slot_idx + 1
|
||||
|
||||
inode = ext4lib.parse_inode_full(idata, ino_off, sb)
|
||||
if inode is None or inode['mode'] == 0:
|
||||
continue
|
||||
|
||||
status = ext4lib.classify_inode(idata, ino_off)
|
||||
ext4db.save_inode(db, abs_inum, grp, inode, status)
|
||||
found += 1
|
||||
|
||||
# For directories with links, also read dir entries from disk
|
||||
if inode['type'] == ext4lib.ITYPE_DIR and inode['links'] > 0:
|
||||
try:
|
||||
entries = ext4lib.read_dir_entries_raw(f, idata, ino_off)
|
||||
for name, (child_inum, ftype) in entries.items():
|
||||
ext4db.save_dir_entry(db, abs_inum, name, child_inum, ftype)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
ext4db.mark_group_scanned(db, grp)
|
||||
return found
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Scan ext4 inodes into SQLite DB (Stage 1)')
|
||||
parser.add_argument('--device', default=DEFAULT_DEV)
|
||||
parser.add_argument('--backup-sb', type=int, default=DEFAULT_BACKUP_SB)
|
||||
parser.add_argument('--db', default='inodes.db')
|
||||
parser.add_argument('--zeroed-groups', type=int, default=13,
|
||||
help='First N groups are damaged; skip scan, flag parents there as orphans')
|
||||
parser.add_argument('--start-group', type=int, default=None,
|
||||
help='Force start at this group (ignores resume state)')
|
||||
parser.add_argument('--end-group', type=int, default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
db = ext4db.open_db(args.db)
|
||||
|
||||
with open(args.device, 'rb') as f:
|
||||
sb, gdt_data, num_groups = ext4lib.load_fs(f, args.backup_sb)
|
||||
|
||||
print(f"Geometry: {sb['blocks_per_group']} blk/grp, "
|
||||
f"{sb['inodes_per_group']} ino/grp, "
|
||||
f"inode_size={sb['inode_size']}, desc_size={sb['desc_size']}")
|
||||
print(f"Total groups: {num_groups} | zeroed groups: 0–{args.zeroed_groups - 1}")
|
||||
|
||||
ext4db.save_fs_meta(db, sb, args.device, args.backup_sb, args.zeroed_groups)
|
||||
|
||||
scanned = ext4db.get_scanned_groups(db)
|
||||
end = args.end_group if args.end_group is not None else num_groups
|
||||
|
||||
if args.start_group is not None:
|
||||
start = args.start_group
|
||||
else:
|
||||
start = args.zeroed_groups
|
||||
|
||||
total_found = 0
|
||||
t0 = time.monotonic()
|
||||
|
||||
for grp in range(start, end):
|
||||
if grp in scanned:
|
||||
continue
|
||||
|
||||
found = scan_group(f, sb, gdt_data, grp, db)
|
||||
total_found += found
|
||||
|
||||
if grp % COMMIT_INTERVAL == 0:
|
||||
db.commit()
|
||||
|
||||
if grp % 100 == 0:
|
||||
elapsed = time.monotonic() - t0
|
||||
rate = (grp - start + 1) / elapsed if elapsed > 0 else 0
|
||||
eta = (end - grp) / rate if rate > 0 else 0
|
||||
print(f" group {grp:6d}/{end} inodes={total_found:,} "
|
||||
f"{rate:.1f} grp/s ETA {eta:.0f}s",
|
||||
end='\r', flush=True)
|
||||
|
||||
db.commit()
|
||||
|
||||
print(f"\nScan complete.")
|
||||
ext4db.print_stats(db)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user