Add raw nbd server
This commit is contained in:
435
nbd_server_raw.py
Normal file
435
nbd_server_raw.py
Normal file
@@ -0,0 +1,435 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
NBD server — raw RAID0 striping from physical disks, bypassing mdadm.
|
||||
|
||||
Opens /dev/sda, /dev/sde, /dev/sdd, /dev/sdc in read-only mode and
|
||||
implements RAID0 striping directly, then applies the ext4 chunk
|
||||
translation formula on top.
|
||||
|
||||
This allows inspection of raw physical data without any mdadm layer.
|
||||
|
||||
Disk order (RAID0): sda=0 sde=1 sdd=2 sdc=3
|
||||
|
||||
DISK_DATA_START: byte offset on each raw disk where the md0 data begins.
|
||||
Typically 0 for mdadm v0.90/v1.0, or 1048576 (2048 sectors) for v1.1/v1.2.
|
||||
Run: mdadm --examine /dev/sda | grep 'Data Offset'
|
||||
to determine the correct value before starting this server.
|
||||
|
||||
Formula modes (--formula):
|
||||
v9 : 4-chunk groups, chunk_idx==4 skipped (original working formula)
|
||||
v10 : 5-chunk groups, all chunks readable (session-2 claim, unverified)
|
||||
raw : no ext4 translation — raw md0 byte space exposed directly
|
||||
(useful for hex inspection without any interpretation layer)
|
||||
|
||||
Usage:
|
||||
# Determine disk data offset first:
|
||||
mdadm --examine /dev/sda | grep 'Data Offset'
|
||||
|
||||
# Build merged GDT (uses md0 — run before switching):
|
||||
python3 build_merged_v2.py
|
||||
|
||||
# Start raw server:
|
||||
python3 nbd_server_raw.py --formula v9 --disk-offset 1048576
|
||||
nbd-client 127.0.0.1 10810 /dev/nbd1 -N ""
|
||||
mount -o ro /dev/nbd1 /mnt/raw
|
||||
"""
|
||||
|
||||
import socket, struct, threading, sys, os, argparse
|
||||
|
||||
# --- Disk layout -----------------------------------------------------------
|
||||
DISKS = ['/dev/sda', '/dev/sde', '/dev/sdd', '/dev/sdc'] # disk index 0-3
|
||||
NUM_DISKS = 4
|
||||
CHUNK_BYTES = 128 * 512 # 64 KB
|
||||
|
||||
# Where md0 byte 0 lives on each raw disk.
|
||||
# Default: 0 (mdadm v0.90/v1.0 with superblock at end).
|
||||
# For v1.1/v1.2 this is usually 1048576 (2048 sectors from disk start).
|
||||
# Override with --disk-offset on the command line.
|
||||
DEFAULT_DISK_DATA_START = 0
|
||||
|
||||
# --- Filesystem / LV layout ------------------------------------------------
|
||||
LV_PHYS_START = 5120000 * 512 # byte offset of LV within md0 virtual space
|
||||
BSIZE = 4096
|
||||
BPG = 32768
|
||||
GDT_ENTRY_SZ = 64
|
||||
NUM_GROUPS = 35728
|
||||
GDT_VIRT_START = BSIZE
|
||||
GDT_VIRT_END = BSIZE + NUM_GROUPS * GDT_ENTRY_SZ
|
||||
MERGED_GDT_PATH = '/tmp/merged_gdt.bin'
|
||||
|
||||
# Virtual size exposed over NBD (9365766144 * 512 bytes)
|
||||
VIRT_SIZE = 9365766144 * 512
|
||||
|
||||
# --- NBD protocol constants ------------------------------------------------
|
||||
NBDMAGIC = 0x4e42444d41474943
|
||||
IHAVEOPT = 0x49484156454F5054
|
||||
REPLYMAGIC = 0x3e889045565a9
|
||||
NBD_OPT_EXPORT_NAME = 1
|
||||
NBD_OPT_ABORT = 2
|
||||
NBD_OPT_LIST = 3
|
||||
NBD_OPT_GO = 7
|
||||
NBD_REP_ACK = 1
|
||||
NBD_REP_SERVER = 2
|
||||
NBD_REP_INFO = 3
|
||||
NBD_REP_ERR_UNSUP = (1 << 31) | 1
|
||||
NBD_INFO_EXPORT = 0
|
||||
NBD_FLAG_HAS_FLAGS = 1 << 0
|
||||
NBD_FLAG_READ_ONLY = 1 << 1
|
||||
NBD_FLAG_SEND_FLUSH = 1 << 2
|
||||
NBD_REQUEST_MAGIC = 0x25609513
|
||||
NBD_REPLY_MAGIC = 0x67446698
|
||||
NBD_CMD_READ = 0
|
||||
NBD_CMD_DISC = 2
|
||||
NBD_CMD_FLUSH = 3
|
||||
TX_FLAGS = NBD_FLAG_HAS_FLAGS | NBD_FLAG_READ_ONLY | NBD_FLAG_SEND_FLUSH
|
||||
|
||||
# Set by main() after arg parsing
|
||||
DISK_DATA_START = DEFAULT_DISK_DATA_START
|
||||
FORMULA = 'v9'
|
||||
DISK_FDS = [] # list of open file objects, index == disk index
|
||||
MERGED_GDT_DATA = b''
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Raw RAID0 layer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def md0_read(md0_offset: int, length: int) -> bytearray:
|
||||
"""Read `length` bytes starting at `md0_offset` in the virtual md0 space.
|
||||
|
||||
Implements RAID0 striping: for each 64KB chunk, determine which physical
|
||||
disk it lives on and read from that disk at the appropriate offset.
|
||||
"""
|
||||
result = bytearray(length)
|
||||
pos = md0_offset
|
||||
remaining = length
|
||||
|
||||
while remaining > 0:
|
||||
chunk_num = pos // CHUNK_BYTES
|
||||
intra = pos % CHUNK_BYTES
|
||||
disk_idx = chunk_num % NUM_DISKS
|
||||
chunk_on_disk = chunk_num // NUM_DISKS
|
||||
seg_len = min(CHUNK_BYTES - intra, remaining)
|
||||
dst_off = pos - md0_offset
|
||||
|
||||
disk_byte = DISK_DATA_START + chunk_on_disk * CHUNK_BYTES + intra
|
||||
|
||||
fd = DISK_FDS[disk_idx]
|
||||
fd.seek(disk_byte)
|
||||
data = fd.read(seg_len)
|
||||
result[dst_off:dst_off + len(data)] = data
|
||||
|
||||
pos += seg_len
|
||||
remaining -= seg_len
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ext4 chunk translation layer (converts ext4 virt → md0 offset)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def ext4_to_md0_v9(virt_offset: int, length: int) -> bytearray:
|
||||
"""v9 formula: 4-data-chunk groups; chunk_idx==4 reads as zeros."""
|
||||
result = bytearray(length)
|
||||
pos = virt_offset
|
||||
remaining = length
|
||||
|
||||
while remaining > 0:
|
||||
group = pos // (5 * CHUNK_BYTES)
|
||||
in_group = pos % (5 * CHUNK_BYTES)
|
||||
chunk_idx = in_group // CHUNK_BYTES
|
||||
intra = in_group % CHUNK_BYTES
|
||||
seg_len = min(CHUNK_BYTES - intra, remaining)
|
||||
dst_off = pos - virt_offset
|
||||
|
||||
if chunk_idx != 4:
|
||||
md0_off = (LV_PHYS_START
|
||||
+ group * 4 * CHUNK_BYTES
|
||||
+ chunk_idx * CHUNK_BYTES
|
||||
+ intra)
|
||||
chunk_data = md0_read(md0_off, seg_len)
|
||||
result[dst_off:dst_off + len(chunk_data)] = chunk_data
|
||||
# chunk_idx==4 → leave as zeros in result
|
||||
|
||||
pos += seg_len
|
||||
remaining -= seg_len
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def ext4_to_md0_v10(virt_offset: int, length: int) -> bytearray:
|
||||
"""v10 formula: all 5 chunks per group are data (no skip)."""
|
||||
result = bytearray(length)
|
||||
pos = virt_offset
|
||||
remaining = length
|
||||
|
||||
while remaining > 0:
|
||||
group = pos // (5 * CHUNK_BYTES)
|
||||
in_group = pos % (5 * CHUNK_BYTES)
|
||||
chunk_idx = in_group // CHUNK_BYTES
|
||||
intra = in_group % CHUNK_BYTES
|
||||
seg_len = min(CHUNK_BYTES - intra, remaining)
|
||||
dst_off = pos - virt_offset
|
||||
|
||||
md0_off = (LV_PHYS_START
|
||||
+ group * 5 * CHUNK_BYTES
|
||||
+ chunk_idx * CHUNK_BYTES
|
||||
+ intra)
|
||||
chunk_data = md0_read(md0_off, seg_len)
|
||||
result[dst_off:dst_off + len(chunk_data)] = chunk_data
|
||||
|
||||
pos += seg_len
|
||||
remaining -= seg_len
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def ext4_to_md0_raw(virt_offset: int, length: int) -> bytearray:
|
||||
"""Raw mode: expose md0 directly, no ext4 chunk translation.
|
||||
|
||||
virt_offset is treated as a byte offset within md0 (starting from 0,
|
||||
not from LV_START). Useful for raw hex inspection of md0 byte space.
|
||||
"""
|
||||
return md0_read(virt_offset, length)
|
||||
|
||||
|
||||
def raw_read(virt_offset: int, length: int) -> bytearray:
|
||||
if FORMULA == 'v9':
|
||||
return ext4_to_md0_v9(virt_offset, length)
|
||||
elif FORMULA == 'v10':
|
||||
return ext4_to_md0_v10(virt_offset, length)
|
||||
else: # raw
|
||||
return ext4_to_md0_raw(virt_offset, length)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GDT overlay
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def read_virtual(virt_offset: int, length: int) -> bytes:
|
||||
"""Read with merged GDT overlay applied over the primary GDT region."""
|
||||
data = raw_read(virt_offset, length)
|
||||
req_end = virt_offset + length
|
||||
|
||||
if (FORMULA != 'raw'
|
||||
and MERGED_GDT_DATA
|
||||
and virt_offset < GDT_VIRT_END
|
||||
and req_end > GDT_VIRT_START):
|
||||
ol_start = max(virt_offset, GDT_VIRT_START)
|
||||
ol_end = min(req_end, GDT_VIRT_END)
|
||||
src_off = ol_start - GDT_VIRT_START
|
||||
dst_off = ol_start - virt_offset
|
||||
n = ol_end - ol_start
|
||||
data[dst_off:dst_off + n] = MERGED_GDT_DATA[src_off:src_off + n]
|
||||
|
||||
return bytes(data)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# NBD protocol
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def recv_all(conn, n):
|
||||
buf = b''
|
||||
while len(buf) < n:
|
||||
d = conn.recv(n - len(buf))
|
||||
if not d:
|
||||
raise ConnectionError('disconnected')
|
||||
buf += d
|
||||
return buf
|
||||
|
||||
|
||||
def send_reply(conn, opt, rtype, data=b''):
|
||||
conn.sendall(struct.pack('>Q', REPLYMAGIC))
|
||||
conn.sendall(struct.pack('>I', opt))
|
||||
conn.sendall(struct.pack('>I', rtype))
|
||||
conn.sendall(struct.pack('>I', len(data)))
|
||||
if data:
|
||||
conn.sendall(data)
|
||||
|
||||
|
||||
def handle_client(conn, addr):
|
||||
print(f'[nbd] {addr} connected')
|
||||
try:
|
||||
conn.sendall(struct.pack('>Q', NBDMAGIC))
|
||||
conn.sendall(struct.pack('>Q', IHAVEOPT))
|
||||
conn.sendall(struct.pack('>H', 0x0003))
|
||||
recv_all(conn, 4)
|
||||
|
||||
while True:
|
||||
hdr = recv_all(conn, 16)
|
||||
_, opt, opt_len = struct.unpack('>QII', hdr)
|
||||
opt_data = recv_all(conn, opt_len) if opt_len else b''
|
||||
|
||||
if opt == NBD_OPT_EXPORT_NAME:
|
||||
conn.sendall(struct.pack('>Q', VIRT_SIZE))
|
||||
conn.sendall(struct.pack('>H', TX_FLAGS))
|
||||
break
|
||||
elif opt == NBD_OPT_GO:
|
||||
info = struct.pack('>HQH', NBD_INFO_EXPORT, VIRT_SIZE, TX_FLAGS)
|
||||
send_reply(conn, opt, NBD_REP_INFO, info)
|
||||
send_reply(conn, opt, NBD_REP_ACK)
|
||||
break
|
||||
elif opt == NBD_OPT_LIST:
|
||||
send_reply(conn, opt, NBD_REP_SERVER, struct.pack('>I', 0))
|
||||
send_reply(conn, opt, NBD_REP_ACK)
|
||||
elif opt == NBD_OPT_ABORT:
|
||||
send_reply(conn, opt, NBD_REP_ACK)
|
||||
return
|
||||
else:
|
||||
send_reply(conn, opt, NBD_REP_ERR_UNSUP)
|
||||
|
||||
print(f'[nbd] {addr} transmission')
|
||||
while True:
|
||||
hdr = recv_all(conn, 28)
|
||||
magic, flags, cmd, handle, offset, length = \
|
||||
struct.unpack('>IHHQQI', hdr)
|
||||
if magic != NBD_REQUEST_MAGIC:
|
||||
return
|
||||
|
||||
if cmd == NBD_CMD_READ:
|
||||
try:
|
||||
payload = read_virtual(offset, length)
|
||||
except Exception as e:
|
||||
print(f'[nbd] read error {offset}+{length}: {e}')
|
||||
payload = b'\x00' * length
|
||||
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
|
||||
conn.sendall(payload)
|
||||
elif cmd == NBD_CMD_FLUSH:
|
||||
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 0, handle))
|
||||
elif cmd == NBD_CMD_DISC:
|
||||
print(f'[nbd] {addr} disconnected')
|
||||
return
|
||||
else:
|
||||
conn.sendall(struct.pack('>IIQ', NBD_REPLY_MAGIC, 1, handle))
|
||||
|
||||
except (ConnectionError, BrokenPipeError, ConnectionResetError):
|
||||
print(f'[nbd] {addr} dropped')
|
||||
except Exception as e:
|
||||
print(f'[nbd] {addr} error: {e}')
|
||||
import traceback; traceback.print_exc()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Startup / probe helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def probe_disk_offset(disk_data_start: int):
|
||||
"""Sanity-check by reading what should be the FAT32 VBR.
|
||||
|
||||
FAT32 VBR is at md0 sector 1664 (byte 851968).
|
||||
With RAID0: chunk_num=13, disk_idx=1 (sde), chunk_on_disk=3.
|
||||
Physical byte on sde = disk_data_start + 3*65536 + 0 = disk_data_start + 196608
|
||||
"""
|
||||
# md0 byte 851968 is FAT32 VBR
|
||||
fat_md0 = 851968
|
||||
chunk_num = fat_md0 // CHUNK_BYTES
|
||||
intra = fat_md0 % CHUNK_BYTES
|
||||
disk_idx = chunk_num % NUM_DISKS
|
||||
chunk_on_disk = chunk_num // NUM_DISKS
|
||||
disk_byte = disk_data_start + chunk_on_disk * CHUNK_BYTES + intra
|
||||
|
||||
fd = DISK_FDS[disk_idx]
|
||||
fd.seek(disk_byte)
|
||||
sig = fd.read(3)
|
||||
disk_name = DISKS[disk_idx]
|
||||
|
||||
if len(sig) >= 2 and sig[0] in (0xEB, 0xFA, 0xE9):
|
||||
print(f' [probe] FAT32/boot-sector signature found at {disk_name}:'
|
||||
f'{disk_byte} — disk-offset looks correct')
|
||||
return True
|
||||
else:
|
||||
print(f' [probe] FAT32 signature NOT found at {disk_name}:{disk_byte}'
|
||||
f' (got {sig.hex()}) — disk-offset may be wrong')
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
global DISK_DATA_START, FORMULA, DISK_FDS, MERGED_GDT_DATA
|
||||
|
||||
parser = argparse.ArgumentParser(description='Raw-disk RAID0 NBD server')
|
||||
parser.add_argument('--formula', choices=['v9', 'v10', 'raw'], default='v9',
|
||||
help='ext4 chunk translation: v9 (4-chunk skip), '
|
||||
'v10 (5-chunk all), raw (no translation, expose md0)')
|
||||
parser.add_argument('--disk-offset', type=int, default=DEFAULT_DISK_DATA_START,
|
||||
metavar='BYTES',
|
||||
help='byte offset on each raw disk where md0 data begins '
|
||||
'(run: mdadm --examine /dev/sda | grep "Data Offset")')
|
||||
parser.add_argument('--port', type=int, default=10810,
|
||||
help='TCP port to listen on (default 10810)')
|
||||
parser.add_argument('--no-gdt-overlay', action='store_true',
|
||||
help='disable merged GDT overlay')
|
||||
parser.add_argument('--probe', action='store_true',
|
||||
help='probe disk-offset via FAT32 VBR signature and exit')
|
||||
args = parser.parse_args()
|
||||
|
||||
DISK_DATA_START = args.disk_offset
|
||||
FORMULA = args.formula
|
||||
|
||||
# Open all disks read-only
|
||||
print('Opening raw disks (read-only)...')
|
||||
for path in DISKS:
|
||||
try:
|
||||
fd = open(path, 'rb')
|
||||
DISK_FDS.append(fd)
|
||||
print(f' {path} : OK')
|
||||
except PermissionError:
|
||||
print(f' {path} : PERMISSION DENIED — run as root')
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
print(f' {path} : NOT FOUND')
|
||||
sys.exit(1)
|
||||
|
||||
if args.probe:
|
||||
probe_disk_offset(DISK_DATA_START)
|
||||
for fd in DISK_FDS:
|
||||
fd.close()
|
||||
sys.exit(0)
|
||||
|
||||
# Load merged GDT (optional)
|
||||
if not args.no_gdt_overlay:
|
||||
if os.path.exists(MERGED_GDT_PATH):
|
||||
with open(MERGED_GDT_PATH, 'rb') as f:
|
||||
MERGED_GDT_DATA = f.read()
|
||||
expected = NUM_GROUPS * GDT_ENTRY_SZ
|
||||
if len(MERGED_GDT_DATA) == expected:
|
||||
print(f'Merged GDT loaded: {len(MERGED_GDT_DATA)//1024}KB')
|
||||
else:
|
||||
print(f'WARNING: merged GDT size mismatch '
|
||||
f'({len(MERGED_GDT_DATA)} vs {expected}), skipping overlay')
|
||||
MERGED_GDT_DATA = b''
|
||||
else:
|
||||
print(f'NOTE: {MERGED_GDT_PATH} not found, GDT overlay disabled')
|
||||
|
||||
# Probe sanity check (non-fatal)
|
||||
print('Probing disk-offset sanity...')
|
||||
probe_disk_offset(DISK_DATA_START)
|
||||
|
||||
print()
|
||||
print('PERC H710 recovery NBD server — raw disk mode')
|
||||
print(f' disks : {" ".join(DISKS)}')
|
||||
print(f' disk-offset : {DISK_DATA_START} bytes ({DISK_DATA_START//512} sectors)')
|
||||
print(f' formula : {FORMULA}')
|
||||
print(f' virt_size : {VIRT_SIZE // 1024**3} GB')
|
||||
print(f' GDT overlay : {"yes" if MERGED_GDT_DATA else "no"}')
|
||||
print()
|
||||
|
||||
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
srv.bind(('127.0.0.1', args.port))
|
||||
srv.listen(5)
|
||||
print(f'Listening on 127.0.0.1:{args.port}')
|
||||
print(f' nbd-client 127.0.0.1 {args.port} /dev/nbd1 -N ""')
|
||||
print()
|
||||
|
||||
while True:
|
||||
conn, addr = srv.accept()
|
||||
threading.Thread(target=handle_client, args=(conn, addr),
|
||||
daemon=True).start()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user