diff --git a/.gitattributes b/.gitattributes index dc32fd3..2b5083b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ tests/data/dm/* filter=lfs diff=lfs merge=lfs -text tests/data/md/* filter=lfs diff=lfs merge=lfs -text tests/data/ddf/* filter=lfs diff=lfs merge=lfs -text tests/data/lvm/* filter=lfs diff=lfs merge=lfs -text +tests/data/vinum/* filter=lfs diff=lfs merge=lfs -text diff --git a/dissect/volume/vinum/c_vinum.py b/dissect/volume/vinum/c_vinum.py new file mode 100644 index 0000000..d9043af --- /dev/null +++ b/dissect/volume/vinum/c_vinum.py @@ -0,0 +1,83 @@ +from dissect.cstruct import cstruct + +# Structures are copied from: +# https://github.com/freebsd/freebsd-src/blob/f21a6a6a8fc59393173d9a537ed8cebbdbd6343c/sys/geom/vinum/geom_vinum_var.h + +vinum_def = """ +struct timeval { + uint64 sec; + uint64 usec; +}; + +typedef uint64 off_t; + +/* + * Slice header + * + * Vinum drives start with this structure: + * + *\ Sector + * |--------------------------------------| + * | PDP-11 memorial boot block | 0 + * |--------------------------------------| + * | Disk label, maybe | 1 + * |--------------------------------------| + * | Slice definition (vinum_hdr) | 8 + * |--------------------------------------| + * | | + * | Configuration info, first copy | 9 + * | | + * |--------------------------------------| + * | | + * | Configuration info, second copy | 9 + size of config + * | | + * |--------------------------------------| + */ + +/* Sizes and offsets of our information. */ +#define GV_HDR_OFFSET 4096 /* Offset of vinum header. */ +#define GV_HDR_LEN 512 /* Size of vinum header. */ +#define GV_CFG_OFFSET 4608 /* Offset of first config copy. */ +#define GV_CFG_LEN 65536 /* Size of config copy. */ + +/* This is where the actual data starts. */ +#define GV_DATA_START (GV_CFG_LEN * 2 + GV_CFG_OFFSET) +/* #define GV_DATA_START (GV_CFG_LEN * 2 + GV_HDR_LEN) */ + +#define GV_MAXDRIVENAME 32 /* Maximum length of a device name. */ + +/* + * hostname is 256 bytes long, but we don't need to shlep multiple copies in + * vinum. We use the host name just to identify this system, and 32 bytes + * should be ample for that purpose. + */ + +#define GV_HOSTNAME_LEN 32 +struct gv_label { + char sysname[GV_HOSTNAME_LEN]; /* System name at creation time. */ + char name[GV_MAXDRIVENAME]; /* Our name of the drive. */ + struct timeval date_of_birth; /* The time it was created ... */ + struct timeval last_update; /* ... and the time of last update. */ + off_t drive_size; /* Total size incl. headers. */ +}; + +#define GV_OLD_MAGIC 0x494E2056494E4F00LL +#define GV_OLD_NOMAGIC 0x4E4F2056494E4F00LL +#define GV_MAGIC 0x56494E554D2D3100LL +#define GV_NOMAGIC 0x56494E554D2D2D00LL + +/* The 'header' of each valid vinum drive. */ +struct gv_hdr { + uint64_t magic; + uint64_t config_length; + struct gv_label label; +} header; +""" # noqa W605 + +c_vinum = cstruct(endian=">").load(vinum_def) + +# Not really needed as this size is hardcoded in the various GV_*_OFFSET and related values +SECTOR_SIZE = 512 + +MAGIC_ACTIVE = {c_vinum.GV_OLD_MAGIC, c_vinum.GV_MAGIC} +MAGIC_INACTIVE = {c_vinum.GV_OLD_NOMAGIC, c_vinum.GV_NOMAGIC} diff --git a/dissect/volume/vinum/config.py b/dissect/volume/vinum/config.py new file mode 100644 index 0000000..44014f1 --- /dev/null +++ b/dissect/volume/vinum/config.py @@ -0,0 +1,425 @@ +from __future__ import annotations + +import logging +import os +import re +from dataclasses import dataclass +from datetime import datetime +from enum import Enum, auto +from typing import TypedDict + +from dissect.volume.vinum.c_vinum import c_vinum + +log = logging.getLogger(__name__) +log.setLevel(os.getenv("DISSECT_LOG_VINUM", "CRITICAL")) + + +@dataclass +class Volume: + """The representation of a Vinum Volume. + + A Vinum Volume defines a single RAID set. One or more Vinum Plexes can be + part of a Volume. + """ + + timestamp: datetime + name: bytes + state: VolumeState | None = None + + +@dataclass +class Plex: + """The representation of a Vinum Plex. + + A Vinum Plex can be thought of as one of the individual disks in a mirrored + array. One or more Vinum SDs can be part of a Plex. The Plex defines the + type of RAID in which these SDs are organized. + """ + + timestamp: datetime + name: bytes | None = None + org: PlexOrg | None = None + stripesize: int | None = None + volume: bytes | None = None + state: PlexState | None = None + + +@dataclass +class SD: + """The representation of a Vinum SD. + + A Vinum SD contains information about the actual physical disk and points + to the device of this disk. + """ + + timestamp: datetime + drive: bytes + name: bytes | None = None + # length is the size in bytes of the data section on disk, so without any + # vinum headers etc. + length: int | None = None + # driveoffset is the start of the data section on disk in bytes. + driveoffset: int | None = None + plex: bytes | None = None + # plexoffset is the offset of the data section of this disk within the plex in + # bytes, e.g.: the first disk always starts at offset 0, if the size of its + # data section (SD.length) is 1024b then the plexoffset for the second disk + # will be 1024. + plexoffset: int | None = None + state: SDState | None = None + + +class ParseError(Exception): + pass + + +class BytesDefaultEnum(bytes, Enum): + @classmethod + def _missing_(cls, value): + return cls._default + + +class VolumeState(BytesDefaultEnum): + DOWN = auto() + UP = b"up" + + _default = DOWN + + +class PlexState(BytesDefaultEnum): + DOWN = auto() + UP = b"up" + INITIALIZING = b"initializing" + DEGRADED = b"degraded" + GROWABLE = b"growable" + + _default = DOWN + + +class PlexOrg(BytesDefaultEnum): + DISORG = auto() + CONCAT = b"concat" + STRIPED = b"striped" + RAID5 = b"raid5" + + _default = DISORG + + +class SDState(BytesDefaultEnum): + DOWN = auto() + UP = b"up" + INITIALIZING = b"initializing" + DEGRADED = b"degraded" + GROWABLE = b"growable" + + _default = DOWN + + +def _parse_size(size: bytes) -> int: + # Only the first byte after the numerals (and optional minus sign) should + # be considered. + postfix = size.lstrip(b"-0123456789") + if postfix: + numeral = size[: -len(postfix)] + else: + numeral = size + unit = postfix[:1] + + try: + size = int(numeral) + except ValueError: + # If there are no numerals (numeral is empty or the minus sign), the + # size should be parsed as 0. + size = 0 + else: + if unit: + # Invalid unites should be ignored and size is returned as is. + if unit in (b"b", b"B", b"s", b"S"): + size = size * 512 # Yes also for b/B + elif unit in (b"k", b"K"): + size = size * 1024 + elif unit in (b"m", b"M"): + size = size * 1024 * 1024 + elif unit in (b"g", b"G"): + size = size * 1024 * 1024 * 1024 + + return size + + +def _parse_volume_config(config_time: datetime, tokens: list[bytes]) -> Volume | None: + volume = None + name = None + state = None + + tokens = iter(tokens) + token = next(tokens, None) + try: + while token is not None: + if token == b"state": + state = VolumeState(next(tokens)) + else: + name = token + token = next(tokens, None) + except StopIteration: + log.debug("No value for token %r, ignoring volume config", token) + else: + if name is None: + log.debug("No name found for volume, ignoring volume config") + else: + volume = Volume( + timestamp=config_time, + name=name, + state=state, + ) + + return volume + + +def _parse_plex_config(config_time: datetime, tokens: list[bytes]) -> Plex | None: + plex = None + name = None + org = None + stripesize = None + volume = None + state = None + + tokens = iter(tokens) + token = next(tokens, None) + try: + while token is not None: + if token == b"name": + name = next(tokens) + elif token == b"org": + org = PlexOrg(next(tokens)) + if org == PlexOrg.RAID5 or org == PlexOrg.STRIPED: + stripesize = _parse_size(next(tokens)) + # the kernel parser only checks on == 0, but < 0 also seems unreasonable + if stripesize <= 0: + raise ParseError(f"Invalid stripesize: {stripesize}") + elif token == b"vol" or token == b"volume": + volume = next(tokens) + elif token == b"state": + state = PlexState(next(tokens)) + else: + raise ParseError(f"Unknown token {token}") + + token = next(tokens, None) + + except (StopIteration, ParseError) as err: + if isinstance(err, StopIteration): + log.debug("No value for token %r, ignoring plex config", token) + else: + log.debug("%s, ignoring plex config", err) + + else: + plex = Plex( + timestamp=config_time, + name=name, + org=org, + stripesize=stripesize, + volume=volume, + state=state, + ) + + return plex + + +def _parse_sd_config(config_time: datetime, tokens: list[bytes]) -> SD | None: + sd = None + name = None + drive = None + length = None + driveoffset = None + plex = None + plexoffset = None + state = None + + tokens = iter(tokens) + token = next(tokens, None) + try: + while token is not None: + if token == b"name": + name = next(tokens) + elif token == b"drive": + drive = next(tokens) + elif token == b"len" or token == b"length": + length = _parse_size(next(tokens)) + if length < 0: + length = -1 + elif token == b"driveoffset": + driveoffset = _parse_size(next(tokens)) + if driveoffset != 0 and driveoffset < c_vinum.GV_DATA_START: + raise ParseError(f"Invalid driveoffset: {driveoffset}") + elif token == b"plex": + plex = next(tokens) + elif token == b"plexoffset": + plexoffset = _parse_size(next(tokens)) + if plexoffset < 0: + raise ParseError(f"Invalid plexoffset: {plexoffset}") + elif token == b"state": + state = SDState(next(tokens)) + else: + raise ParseError(f"Unknown token {token}") + + token = next(tokens, None) + + except (StopIteration, ParseError) as err: + if isinstance(err, StopIteration): + log.debug("No value for token %r, ignoring sd config", token) + else: + log.debug("%s, ignoring sd config", err) + + else: + if drive is None: + log.debug("No drive found for sd, ignoring sd config") + else: + sd = SD( + timestamp=config_time, + name=name, + drive=drive, + length=length, + driveoffset=driveoffset, + plex=plex, + plexoffset=plexoffset, + state=state, + ) + + return sd + + +def get_char(line: bytes, idx: int) -> bytes: + """Return a single byte bytestring at index ``idx`` in ``line``. + + If the index is outside of the bounaries of ``line``, an empty bytestring + will be returned. + """ + char = b"" + if idx >= 0 and idx < len(line): + char = line[idx : idx + 1] # this makes sure we get a single byte bytestring + return char + + +class TokenizeError(Exception): + pass + + +def tokenize(line: bytes) -> iter[bytes]: + """Yield individual tokens from a vinum config line. + + This token parser is constructed to be equivalent to the token parser used in the + FreeBSD kernel code. There are a few caveats though: + + - it expects lines to be pre-splitted on newline and null-byte characters + - it does not attempt to parse quoted tokens, as the code in the kernel parser is + buggy and will always lead to an error condition (it will mimick the error condition + though). + """ + whitespace = {b" ", b"\t"} + quotes = {b'"', b"'"} + comment = {b"#"} + eol = {b""} + end_of_list = eol.union(comment) + end_of_token = whitespace.union(eol) + + token = b"" + idx = 0 + while True: + char = get_char(line, idx) + + while char in whitespace: + # Remove leading whitespace up to the next token or end_of_list condition + idx += 1 + char = get_char(line, idx) + + if char in end_of_list: + # We are at the end of the token list (a comment or end of line). + break + + if char in quotes: + # Encountering a quoted token will always lead to an error + # condition in the (Free)BSD vinum kernel code. This is a bug in + # that code, which we mimick here. + raise TokenizeError(f"Found quoted token at index {idx}") + + while char not in end_of_token: + # Add characters to the token until we encounter a stop condition. + # Note that comment and quote characters are allowed in a token as + # long as they are not preceded by whitespace. + token += char + idx += 1 + char = get_char(line, idx) + + if token: + yield token + token = b"" + + idx += 1 + + +class VinumConfigs(TypedDict): + volumes: list[Volume] + plexes: list[Plex] + sds: list[SD] + + +RE_CONFIG_EOL = re.compile(b"[\x00\n]") + + +TOKEN_CONFIG_MAP = { + b"volume": "volumes", + b"plex": "plexes", + b"sd": "sds", +} + + +def parse_vinum_config(config_time: datetime, config: bytes) -> VinumConfigs: + """Parse the on-disk vinum configuration. + + Parsing forgiveness and strictness is implemented in the same way as in the vinum kernel code: + + Lines with an unknown configuration "type" (not b"volume", b"plex" or b"sd"), are ignored. + + Lines that fail to parse due to: + - no name present + - no value present for a token + - unknown token name + - a tokenization error + + will fail that line and the subsequent lines (rest of the config) to not being parsed. + """ + config_data: VinumConfigs = { + "volumes": [], + "plexes": [], + "sds": [], + } + + for line in RE_CONFIG_EOL.split(config): + try: + tokens = tokenize(line) + token = next(tokens, None) + if token is None: + # We encountered a line without tokens (empty, just whitespace or # comments) + continue + if token == b"volume": + parsed_config = _parse_volume_config(config_time, tokens) + elif token == b"plex": + parsed_config = _parse_plex_config(config_time, tokens) + elif token == b"sd": + parsed_config = _parse_sd_config(config_time, tokens) + else: + parsed_config = None + log.debug("Unknown config type in line: %r, ignoring config line", line) + + if parsed_config: + config_type = TOKEN_CONFIG_MAP[token] + config_data[config_type].append(parsed_config) + else: + log.debug("Invalid config line %r", line) + log.debug("Ignoring this line and the rest of the config data") + break + except TokenizeError as err: + log.debug("Invalid config line %r: %s", line, err) + log.debug("Ignoring this line and the rest of the config data") + break + + return config_data diff --git a/dissect/volume/vinum/vinum.py b/dissect/volume/vinum/vinum.py new file mode 100644 index 0000000..abe2d4c --- /dev/null +++ b/dissect/volume/vinum/vinum.py @@ -0,0 +1,349 @@ +# For more information see: +# https://docs.freebsd.org/en/articles/vinum/ +# https://www.usenix.org/legacy/events/usenix99/full_papers/lehey/lehey.pdf +from __future__ import annotations + +import io +import logging +import os +from collections import defaultdict +from functools import cached_property +from typing import TYPE_CHECKING, BinaryIO, TypedDict, TypeVar + +from dissect.util import ts + +from dissect.volume.raid.raid import ( + RAID, + Configuration, + DiskMap, + PhysicalDisk, + VirtualDisk, +) +from dissect.volume.raid.stream import Layout, Level +from dissect.volume.vinum.c_vinum import MAGIC_ACTIVE, MAGIC_INACTIVE, c_vinum +from dissect.volume.vinum.config import ( + SD, + Plex, + PlexOrg, + PlexState, + SDState, + Volume, + parse_vinum_config, +) + +if TYPE_CHECKING: + VinumPhysicalDiskDescriptor = BinaryIO | "VinumPhysicalDisk" + +log = logging.getLogger(__name__) +log.setLevel(os.getenv("DISSECT_LOG_VINUM", "CRITICAL")) + + +class Vinum(RAID): + """Read Vinum RAID sets of one or multiple devices/file-like objects. + + Use this class to read from Vinum RAID sets. + + A single Vinum RAID set is defined by a Volume in the Vinum configuration. + This configuration is present on all physical disks and contains + information on all the RAID sets in the system A Vinum Volume can have one + or more Plexes. + + A Plex can be thought of as one of the individual disks in a mirrored + array. A Plex can contain one or more Vinum SDs. The Plex defines the type + of RAID in which these SDs are organized. + + An SD contains information about the actual physical disk and points to the + device of this disk. + + Args: + fh: A single file-like object or :class:`VinumPhysicalDisk`, or a list + of multiple belonging to the same RAID set. + """ + + def __init__(self, fh: list[VinumPhysicalDiskDescriptor] | VinumPhysicalDiskDescriptor): + fhs = [fh] if not isinstance(fh, list) else fh + physical_disks = [VinumPhysicalDisk(fh) if not isinstance(fh, VinumPhysicalDisk) else fh for fh in fhs] + + super().__init__([VinumConfiguration(physical_disks)]) + + +T = TypeVar("T") +ByName = dict[bytes, T] +DefaultByName = defaultdict[bytes, T] + + +class Config(TypedDict): + volumes: ByName[Volume] + plexes: ByName[Plex] + sds: ByName[SD] + + +class VinumConfiguration(Configuration): + def __init__(self, physical_disks: list[VinumPhysicalDisk]): + # These hold the most recent config for each volume/plex/sd + config: Config = {"volumes": {}, "plexes": {}, "sds": {}} + disks_by_name: ByName[VinumPhysicalDisk] = {} + + # Find the most recent configuration for each Volume/Plex/SD by merging + # all configs from all physical disks. + for disk in physical_disks: + if not disk.active: + # Assuming here that if a disk is marked as inactive, it's + # configuration is also old and/or possibly inaccurate. + continue + + disks_by_name[disk.id] = disk + + # For now we only use the first config block. The second config block + # could be useful/used as a fallback if for instance the first one + # fails to parse. + # + # The self.header.label.last_update timestamp is used to see if this + # disk contains the latest/newest config of the disks in a set. The + # disk is ignored if there is no header (which in our case would have + # raised a ValueError) or if the state is not set to GV_DRIVE_UP (an + # internal kernel state which we don't have and thus can ignore). + # + # Plexes with a non-existing Volume and SDs with a non-existing Plex or + # Drive (VinumPhysicalDisk) can not be used. They result in an error in + # the FreeBSD kernel code. + config_data = parse_vinum_config(disk.config_time, disk.config) + + for config_type, new_items in config_data.items(): + cur_config = config[config_type] + + for new_item in new_items: + if cur_item := cur_config.get(new_item.name): + if new_item.timestamp > cur_item.timestamp: + cur_config[new_item.name] = new_item + else: + cur_config[new_item.name] = new_item + + # plexes_by_name contains all *active* plexes + plexes_by_name: ByName[Plex] = dict() + + # plexes_by_volume contains all *active* plexes grouped by the name of + # the volume they belong to + plexes_by_volume: DefaultByName[list[Plex]] = defaultdict(list) + # sds_by_plex_by_volume contains all sds for all *active* plexes + # grouped by plex and volume name + sds_by_plex_by_volume: DefaultByName[DefaultByName[list[SD]]] = defaultdict(lambda: defaultdict(list)) + + # Check whether the found plexes are viable and filter them out if they + # are not. + # Note that if a complete Volume is down, we still try to use it (we don't check on Volume.state). + for plex in config["plexes"].values(): + if config["volumes"].get(plex.volume): + # A plex in the DOWN state is probably not fit to use + if plex.state != PlexState.DOWN: + if plex.org != PlexOrg.DISORG: + plexes_by_name[plex.name] = plex + plexes_by_volume[plex.volume].append(plex) + else: + log.warning("Plex %r has an unknown organisation, ignoring plex", plex.name) + else: + log.warning("Plex %r is down, ignoring plex", plex.name) + else: + log.warning("Unknown volume %r for plex %r, ignoring plex", plex.volume, plex.name) + + # Check whether the found sds are viable and filter them out if they + # are not. + for sd in config["sds"].values(): + if plex := plexes_by_name.get(sd.plex): + sds_by_plex_by_volume[plex.volume][sd.plex].append(sd) + else: + log.warning("Unknown or inactive plex %r for sd %r, ignoring sd", sd.plex, sd.name) + + # The construction of the disk_map assumes the configuration + # information is complete and no sd configs are missing. + # Note that there are no checks done on the completeness of the sd + # values, these can theoretically be None due to a corrupt + # configuration. + disk_map_by_plex_by_volume: DefaultByName[DefaultByName[list[SD]]] = defaultdict(lambda: defaultdict(list)) + for volume_id, sds_by_plex in sds_by_plex_by_volume.items(): + for plex_id, sds in sds_by_plex.items(): + sds = sorted(sds, key=lambda sd: sd.plexoffset) + disk_map = {} + for idx, sd in enumerate(sds): + if ( + sd.state != SDState.DOWN and sd.state != SDState.DEGRADED and sd.state != SDState.INITIALIZING + ): # sd's in these states are probably not fit to use + if sd.drive in disks_by_name: + disk_map[idx] = (0, disks_by_name[sd.drive]) + else: + log.warning("Physical disk %r for sd %r is missing, ignoring sd", sd.drive, sd.name) + else: + log.warning("SD %r is not in a usable state: %r, ignoring sd", sd.name, sd.state) + if disk_map: + disk_map_by_plex_by_volume[volume_id][plex_id] = disk_map + + # Each volume represents a separate virtual disk + virtual_disks = [] + for volume_id, plexes in plexes_by_volume.items(): + volume = config["volumes"][volume_id] + if plexes: + # Special case if there is only 1 plex (no mirroring) + if len(plexes) == 1: + plex = plexes[0] + disk_map = disk_map_by_plex_by_volume[volume.name][plex.name] + if disk_map: + sds = sds_by_plex_by_volume[volume.name][plex.name] + virtual_disks.append(VinumPlexDisk(volume, plex, sds, disk_map)) + else: + plex_map = {} + # There is no official order in the plexes. However if they + # are named automatically, they have a pseudo order due to + # there names being constructed with p0, p1, etc. + splexes = sorted(plexes, key=lambda plex: plex.name) + for idx, plex in enumerate(splexes): + disk_map = disk_map_by_plex_by_volume[volume.name][plex.name] + if disk_map: + sds = sds_by_plex_by_volume[volume.name][plex.name] + plex_disk = VinumPlexDisk(volume, plex, sds, disk_map) + plex_map[idx] = (0, plex_disk) + + if plex_map: + virtual_disks.append(VinumMirrorDisk(volume, plexes, plex_map)) + + else: + log.warning("Volume %r has no or only inactive plexes, ignoring volume", volume.name) + + if not virtual_disks: + raise ValueError( + "Invalid vinum raid configuration, no volumes found with an active and complete set of disks" + ) + + super().__init__(physical_disks, virtual_disks) + + +org_to_level = { + PlexOrg.CONCAT: Level.LINEAR, + PlexOrg.STRIPED: Level.RAID0, + PlexOrg.RAID5: Level.RAID5, +} + +org_to_layout = { + PlexOrg.CONCAT: 0, + PlexOrg.STRIPED: 0, + PlexOrg.RAID5: Layout.LEFT_ASYMMETRIC, +} + + +class VinumPlexDisk(VirtualDisk): + def __init__( + self, + volume: Volume, + plex: Plex, + sds: list[SD], + disk_map: DiskMap, + ): + self.volume = volume + self.plex = plex + self.sds = sds + + if (level := org_to_level.get(plex.org)) is None: + raise ValueError(f"Plex {plex.name} has an unsupported RAID level: {plex.org}") + if (layout := org_to_layout.get(plex.org)) is None: + raise ValueError(f"Plex {plex.name} has an unsupported RAID level: {plex.org}") + if plex.org == PlexOrg.CONCAT: + stripe_size = 0 # concatenated disks don't have stripes + else: + stripe_size = plex.stripesize + + size = 0 + sd = sds[0] + if plex.org == PlexOrg.RAID5: + # SDs in a vinum RAID5 org are required to have equal size, so we + # don't need to determine the smallest disk. + size = (len(sds) - 1) * sd.length + elif plex.org == PlexOrg.STRIPED: + # SDs in a vinum STRIPED org are required to have equal size, so we + # can just multiply + size = len(sds) * sd.length + else: + for sd in sds: + size += sd.length + + super().__init__( + volume.name.decode(errors="backslashreplace"), + plex.name.decode(errors="surrogateescape"), + size, + level, + layout, + stripe_size, + len(sds), + disk_map, + ) + + +class VinumMirrorDisk(VirtualDisk): + def __init__( + self, + volume: Volume, + plexes: list[Plex], + plex_map: DiskMap, + ): + self.volume = volume + self.plexes = plexes + + # The VinumConfiguration class will make sure there is at least 1 plex disk. + _, plex_disk = next(iter(plex_map.values())) + size = plex_disk.size + super().__init__( + volume.name.decode(errors="backslashreplace"), + volume.name.decode(errors="surrogateescape"), + size, + Level.RAID1, + 0, # simple mirrors don't have a layout + 0, # simple mirrors don't have a stripe size + len(plexes), + plex_map, + ) + + +class VinumPhysicalDisk(PhysicalDisk): + """Parse config from an Vinum device. + + Args: + fh: The file-like object to read config from. + """ + + def __init__(self, fh: BinaryIO): + self.fh = fh + + fh.seek(c_vinum.GV_HDR_OFFSET) + self.header = c_vinum.header(fh) + + if self.header.magic in MAGIC_ACTIVE: + self.active = True + elif self.header.magic in MAGIC_INACTIVE: + self.active = False + else: + raise ValueError("File-like object is not a Vinum device") + + self.id = self.header.label.name.rstrip(b"\x00") + self.name = self.header.label.name.rstrip(b"\x00").decode(errors="backslashreplace") + + last_update = self.header.label.last_update + config_epoch = last_update.sec + last_update.usec * 1e-6 + self.config_time = ts.from_unix(config_epoch) + + size = self.header.label.drive_size + if not size: + fh.seek(0, io.SEEK_END) + size = fh.tell() + + super().__init__(fh, c_vinum.GV_DATA_START, size) + + def _read_config(self, config_offset) -> bytes: + self.fh.seek(config_offset) + config = self.fh.read(self.header.config_length) + return config + + @cached_property + def config(self) -> bytes: + return self._read_config(c_vinum.GV_CFG_OFFSET) + + @cached_property + def config2(self) -> bytes: + return self._read_config(c_vinum.GV_CFG_OFFSET + c_vinum.GV_CFG_LEN) diff --git a/tests/conftest.py b/tests/conftest.py index 7d97a9d..4c3f668 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -193,3 +193,57 @@ def ddf_raid10() -> Iterator[list[BinaryIO]]: "data/ddf/ddf-raid10-4.bin.gz", ] ) + + +@pytest.fixture +def vinum_concat() -> Iterator[list[BinaryIO]]: + yield from open_files_gz( + [ + "data/vinum/vinum-concat_diska.bin.gz", + "data/vinum/vinum-concat_diskb.bin.gz", + ] + ) + + +@pytest.fixture +def vinum_mirror() -> Iterator[list[BinaryIO]]: + yield from open_files_gz( + [ + "data/vinum/vinum-mirror_diska.bin.gz", + "data/vinum/vinum-mirror_diskb.bin.gz", + ] + ) + + +@pytest.fixture +def vinum_raid5() -> Iterator[list[BinaryIO]]: + yield from open_files_gz( + [ + "data/vinum/vinum-raid5_diska.bin.gz", + "data/vinum/vinum-raid5_diskb.bin.gz", + "data/vinum/vinum-raid5_diskc.bin.gz", + "data/vinum/vinum-raid5_diskd.bin.gz", + ] + ) + + +@pytest.fixture +def vinum_striped() -> Iterator[list[BinaryIO]]: + yield from open_files_gz( + [ + "data/vinum/vinum-striped_diska.bin.gz", + "data/vinum/vinum-striped_diskb.bin.gz", + ] + ) + + +@pytest.fixture +def vinum_stripedmirror() -> Iterator[list[BinaryIO]]: + yield from open_files_gz( + [ + "data/vinum/vinum-stripedmirror_diska.bin.gz", + "data/vinum/vinum-stripedmirror_diskb.bin.gz", + "data/vinum/vinum-stripedmirror_diskc.bin.gz", + "data/vinum/vinum-stripedmirror_diskd.bin.gz", + ] + ) diff --git a/tests/data/vinum/vinum-concat_diska.bin.gz b/tests/data/vinum/vinum-concat_diska.bin.gz new file mode 100644 index 0000000..1bfea3f --- /dev/null +++ b/tests/data/vinum/vinum-concat_diska.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f1d9b6dc45b100dedab9e1ac42e581197d2da03c967151caf811ad9b24b14c +size 7755 diff --git a/tests/data/vinum/vinum-concat_diskb.bin.gz b/tests/data/vinum/vinum-concat_diskb.bin.gz new file mode 100644 index 0000000..c5f4362 --- /dev/null +++ b/tests/data/vinum/vinum-concat_diskb.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851db41ac25e33ec1dcd21c09c57f45123ff81cc5852aca616bbd6b82b16988f +size 6281 diff --git a/tests/data/vinum/vinum-mirror_diska.bin.gz b/tests/data/vinum/vinum-mirror_diska.bin.gz new file mode 100644 index 0000000..ad26927 --- /dev/null +++ b/tests/data/vinum/vinum-mirror_diska.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6269c79ba38ca2563c297e21b1e7210428d36cdb04c4bf0c3f03273771671e4a +size 6387 diff --git a/tests/data/vinum/vinum-mirror_diskb.bin.gz b/tests/data/vinum/vinum-mirror_diskb.bin.gz new file mode 100644 index 0000000..9a2a2e8 --- /dev/null +++ b/tests/data/vinum/vinum-mirror_diskb.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54569b1ea1dc8ecbf0bf7935eebd441cd35874c7e6d2b76cf9459c85d94b6bf +size 6387 diff --git a/tests/data/vinum/vinum-raid5_diska.bin.gz b/tests/data/vinum/vinum-raid5_diska.bin.gz new file mode 100644 index 0000000..1d1ab4c --- /dev/null +++ b/tests/data/vinum/vinum-raid5_diska.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fed05336eae2173ca928f92db4d603febe249f0f53e991fc1cfa4a8b0d58857 +size 5851 diff --git a/tests/data/vinum/vinum-raid5_diskb.bin.gz b/tests/data/vinum/vinum-raid5_diskb.bin.gz new file mode 100644 index 0000000..e35d55b --- /dev/null +++ b/tests/data/vinum/vinum-raid5_diskb.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dacec73cbe020d7b5b891c35a15b228eedd2e1f72aa15dc9d86485a58fc1510 +size 7293 diff --git a/tests/data/vinum/vinum-raid5_diskc.bin.gz b/tests/data/vinum/vinum-raid5_diskc.bin.gz new file mode 100644 index 0000000..2f8b317 --- /dev/null +++ b/tests/data/vinum/vinum-raid5_diskc.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1667cec201916e706c358d71fd5973ffde9ee76f3baec07f61fcd8bbab53e6f8 +size 6305 diff --git a/tests/data/vinum/vinum-raid5_diskd.bin.gz b/tests/data/vinum/vinum-raid5_diskd.bin.gz new file mode 100644 index 0000000..450cdd3 --- /dev/null +++ b/tests/data/vinum/vinum-raid5_diskd.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ca3ac49e174722e3951aeca60910a0d57bfc88bfd9755f7c121cec3fb06e61 +size 7893 diff --git a/tests/data/vinum/vinum-striped_diska.bin.gz b/tests/data/vinum/vinum-striped_diska.bin.gz new file mode 100644 index 0000000..f9ad46c --- /dev/null +++ b/tests/data/vinum/vinum-striped_diska.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a623a9185ad246ba7a078a4c0d44d356d8af3728ea1e14c947036e71420e5b97 +size 6685 diff --git a/tests/data/vinum/vinum-striped_diskb.bin.gz b/tests/data/vinum/vinum-striped_diskb.bin.gz new file mode 100644 index 0000000..6b9e26d --- /dev/null +++ b/tests/data/vinum/vinum-striped_diskb.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be715b13ca0e930faac63215e8b3e4c8672ce1ea499d017e3e19934b0b8b975f +size 4033 diff --git a/tests/data/vinum/vinum-stripedmirror_diska.bin.gz b/tests/data/vinum/vinum-stripedmirror_diska.bin.gz new file mode 100644 index 0000000..b6435b9 --- /dev/null +++ b/tests/data/vinum/vinum-stripedmirror_diska.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b722a98ab240c9e17e414f23b39d90abfcb4adc8ba1742f083e9e8307381ce +size 7309 diff --git a/tests/data/vinum/vinum-stripedmirror_diskb.bin.gz b/tests/data/vinum/vinum-stripedmirror_diskb.bin.gz new file mode 100644 index 0000000..27b0b1b --- /dev/null +++ b/tests/data/vinum/vinum-stripedmirror_diskb.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e729b14a50d122e5ccfcab1f5df7a4f3bd4997a1984680e3d116cf877e5edc3f +size 7309 diff --git a/tests/data/vinum/vinum-stripedmirror_diskc.bin.gz b/tests/data/vinum/vinum-stripedmirror_diskc.bin.gz new file mode 100644 index 0000000..f597ae6 --- /dev/null +++ b/tests/data/vinum/vinum-stripedmirror_diskc.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eddd620de9b1893344a186d05da1f0a8dfa0f65814cd4a2b85ded62ce6ac5cc +size 4461 diff --git a/tests/data/vinum/vinum-stripedmirror_diskd.bin.gz b/tests/data/vinum/vinum-stripedmirror_diskd.bin.gz new file mode 100644 index 0000000..ffc7206 --- /dev/null +++ b/tests/data/vinum/vinum-stripedmirror_diskd.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c511528c80098624d253627eaf2e0c52c4a40a82ac429292fc99e8f15018e3c2 +size 4461 diff --git a/tests/test_vinum.py b/tests/test_vinum.py new file mode 100644 index 0000000..5490fa0 --- /dev/null +++ b/tests/test_vinum.py @@ -0,0 +1,238 @@ +from datetime import datetime, timezone +from io import BytesIO +from typing import BinaryIO + +import pytest + +from dissect.volume.raid.stream import Layout, Level +from dissect.volume.vinum.c_vinum import c_vinum +from dissect.volume.vinum.vinum import ( + Vinum, + VinumMirrorDisk, + VinumPhysicalDisk, + VinumPlexDisk, +) + + +@pytest.mark.parametrize( + ( + "disk_files", + "name", + "uuid", + "size", + "is_mirror", + "level", + "layout", + "stripe_size", + "num_disks", + "physical_disks", + "read_offset", + ), + [ + ( + "vinum_concat", + "my-concat-vol", + "my-concat-vol.p0", + 1825792, + False, + Level.LINEAR, + 0, + 0, + 2, + ( + ( + (0, 0, b"gvinumdrive2", datetime(2024, 10, 21, 13, 14, 44, 653168, tzinfo=timezone.utc)), + (1, 0, b"gvinumdrive3", datetime(2024, 10, 21, 13, 14, 44, 653168, tzinfo=timezone.utc)), + ), + ), + 1024 * 1024, + ), + ( + "vinum_mirror", + "my-mirror-vol", + "my-mirror-vol", + 912896, + True, + Level.LINEAR, + 0, + 0, + 1, + ( + ((0, 0, b"gvinumdrive8", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)),), + ((0, 0, b"gvinumdrive9", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)),), + ), + 512 * 1024, + ), + ( + "vinum_raid5", + "my-raid5-vol", + "my-raid5-vol.p0", + 2359296, + False, + Level.RAID5, + 0, + 262144, + 4, + ( + ( + (0, 0, b"gvinumdrive4", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + (1, 0, b"gvinumdrive5", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + (2, 0, b"gvinumdrive6", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + (3, 0, b"gvinumdrive7", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + ), + ), + 1024 * 1024, + ), + ( + "vinum_striped", + "my-striped-vol", + "my-striped-vol.p0", + 1572864, + False, + Level.RAID0, + 0, + 262144, + 2, + ( + ( + (0, 0, b"gvinumdrive0", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + (1, 0, b"gvinumdrive1", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + ), + ), + 1024 * 1024, + ), + ( + "vinum_stripedmirror", + "my-stripedmirror-vol", + "my-stripedmirror-vol", + 1572864, + True, + Level.RAID0, + 0, + 262144, + 2, + ( + ( + (0, 0, b"gvinumdrive10", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + (1, 0, b"gvinumdrive12", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + ), + ( + (0, 0, b"gvinumdrive11", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + (1, 0, b"gvinumdrive13", datetime(2024, 9, 24, 12, 20, 54, 919756, tzinfo=timezone.utc)), + ), + ), + 1024 * 1024, + ), + ], +) +def test_vinum( + disk_files: str, + name: bytes, + uuid: bytes, + size: int, + is_mirror: bool, + level: Level, + layout: Layout, + stripe_size: int, + num_disks: int, + physical_disks: tuple[tuple[tuple[int, int, bytes, datetime]]], # disk_idx, data_offset, name, config_time + read_offset: int, + request: pytest.FixtureRequest, +) -> None: + disk_files: list[BinaryIO] = request.getfixturevalue(disk_files) + vinum = Vinum(disk_files) + + assert len(vinum.configurations) == 1 + assert len(vinum.configurations[0].virtual_disks) == 1 + + vd = vinum.configurations[0].virtual_disks[0] + assert vd.name == name + assert vd.uuid == uuid + assert vd.size == size + + if is_mirror: + # these are always the same for any mirror type + assert vd.level == Level.RAID1 + assert isinstance(vd, VinumMirrorDisk) + assert vd.layout == 0 + assert vd.stripe_size == 0 + assert vd.num_disks == 2 + assert len(vd.disk_map) == 2 + disk_map = vd.disk_map + else: + # fake disk map + disk_map = {0: (0, vd)} + + check_idx = 0 + for idx_i, (disk_offset_i, vd_i) in disk_map.items(): + # as the indexes in a disk_map for a mirror set are artificial, they + # should increase monotonously from 0. + assert idx_i == check_idx + check_idx += 1 + + if is_mirror: + # these are always the same for any mirror type and virtual disk in the mirror set + assert disk_offset_i == 0 + assert isinstance(vd_i, VinumPlexDisk) + vd_i_uuid = f"{uuid}.p{idx_i}" + assert vd_i.name == name + assert vd_i.uuid == vd_i_uuid + assert vd_i.size == size # Always the same as vd.size + + # in our case, for mirror sets these are the same for each virtual disk in the set + assert vd_i.level == level + assert vd_i.layout == layout + assert vd_i.stripe_size == stripe_size + assert vd_i.num_disks == num_disks + assert len(vd_i.disk_map) == num_disks + + pdisks = physical_disks[idx_i] + vpds = sorted(vd_i.disk_map.items()) + for idx, (disk_idx, (disk_offset, vpd)) in enumerate(vpds): + # these happen to be always the same for our test images + assert isinstance(vpd, VinumPhysicalDisk) + assert vpd.offset == 135680 + assert vpd.size == 1048576 + + # these are always specific for any physical disk in in the virtual + # disk, also for the disks in the virtual disks of a mirror set + pdisk = pdisks[idx] + assert disk_idx == pdisk[0] + assert disk_offset == pdisk[1] + assert vpd.id == pdisk[2] + assert vpd.name == pdisk[2].decode(errors="backslashreplace") + assert vpd.config_time == pdisk[3] + + disk = vd.open() + disk.seek(read_offset) + data = disk.read(256) + expected = bytearray(ii for ii in range(256)) + assert data == expected + + +@pytest.mark.parametrize( + ("magic", "active"), + [ + (0x494E2056494E4F00, True), + (0x4E4F2056494E4F00, False), + (0x56494E554D2D3100, True), + (0x56494E554D2D2D00, False), + ], +) +def test_vinum_physical_disk(magic, active): + header = c_vinum.header() + header.magic = magic + + fake_disk = BytesIO(b"\x00" * c_vinum.GV_HDR_OFFSET + bytes(header)) + vpd = VinumPhysicalDisk(fake_disk) + + assert vpd.active == active + + +def test_vinum_physical_disk_invalid(): + header = c_vinum.header() + header.magic = 0xDEADCAFEDEADCAFE + + fake_disk = BytesIO(b"\x00" * c_vinum.GV_HDR_OFFSET + bytes(header)) + with pytest.raises(ValueError): + VinumPhysicalDisk(fake_disk) diff --git a/tests/test_vinum_config.py b/tests/test_vinum_config.py new file mode 100644 index 0000000..8f2894e --- /dev/null +++ b/tests/test_vinum_config.py @@ -0,0 +1,460 @@ +from __future__ import annotations + +import logging +from datetime import datetime +from unittest.mock import patch + +import pytest + +from dissect.volume.vinum.config import ( + SD, + Plex, + PlexOrg, + PlexState, + SDState, + TokenizeError, + VinumConfigs, + Volume, + VolumeState, + _parse_plex_config, + _parse_sd_config, + _parse_size, + _parse_volume_config, + get_char, + log, + parse_vinum_config, + tokenize, +) + +CONF_TS = datetime.min + + +def test_volume_state() -> None: + assert VolumeState.UP == VolumeState(b"up") + assert VolumeState.DOWN == VolumeState(b"down") + assert VolumeState.DOWN == VolumeState(b"foo") + + +def test_plex_state() -> None: + assert PlexState.UP == PlexState(b"up") + assert PlexState.INITIALIZING == PlexState(b"initializing") + assert PlexState.DEGRADED == PlexState(b"degraded") + assert PlexState.GROWABLE == PlexState(b"growable") + assert PlexState.DOWN == PlexState(b"down") + assert PlexState.DOWN == PlexState(b"foo") + + +def test_plex_org() -> None: + assert PlexOrg.CONCAT == PlexOrg(b"concat") + assert PlexOrg.STRIPED == PlexOrg(b"striped") + assert PlexOrg.RAID5 == PlexOrg(b"raid5") + assert PlexOrg.DISORG == PlexOrg(b"?") + assert PlexOrg.DISORG == PlexOrg(b"foo") + + +def test_sd_state() -> None: + assert SDState.UP == SDState(b"up") + assert SDState.INITIALIZING == SDState(b"initializing") + assert SDState.DEGRADED == SDState(b"degraded") + assert SDState.GROWABLE == SDState(b"growable") + assert SDState.DOWN == SDState(b"down") + assert SDState.DOWN == SDState(b"foo") + + +@pytest.mark.parametrize( + ("bytestr", "size"), + [ + (b"123", 123), + (b"123foo", 123), + (b"123b", 123 * 512), + (b"123bfoo", 123 * 512), + (b"123B", 123 * 512), + (b"123Bfoo", 123 * 512), + (b"123s", 123 * 512), + (b"123sfoo", 123 * 512), + (b"123S", 123 * 512), + (b"123Sfoo", 123 * 512), + (b"123k", 123 * 1024), + (b"123kfoo", 123 * 1024), + (b"123K", 123 * 1024), + (b"123Kfoo", 123 * 1024), + (b"123m", 123 * 1024 * 1024), + (b"123mfoo", 123 * 1024 * 1024), + (b"123M", 123 * 1024 * 1024), + (b"123Mfoo", 123 * 1024 * 1024), + (b"123g", 123 * 1024 * 1024 * 1024), + (b"123gfoo", 123 * 1024 * 1024 * 1024), + (b"123G", 123 * 1024 * 1024 * 1024), + (b"123Gfoo", 123 * 1024 * 1024 * 1024), + (b" 123", 0), + (b"foo", 0), + (b"", 0), + ], +) +def test__parse_size(bytestr: bytes, size: int) -> None: + assert _parse_size(bytestr) == size + + +@pytest.mark.parametrize( + ("tokens", "result", "logline"), + [ + ([b"myname"], Volume(timestamp=CONF_TS, name=b"myname"), ""), + ([b"myname", b"state", b"up"], Volume(timestamp=CONF_TS, name=b"myname", state=VolumeState(b"up")), ""), + ([b"state", b"up", b"myname"], Volume(timestamp=CONF_TS, name=b"myname", state=VolumeState(b"up")), ""), + ([b"myname", b"state"], None, "No value for token b'state', ignoring volume config"), + ([b"state", b"up"], None, "No name found for volume, ignoring volume config"), + ], +) +def test__parse_volume_config( + caplog: pytest.LogCaptureFixture, + tokens: list[bytes], + result: Volume | None, + logline: str, +) -> None: + caplog.set_level(logging.DEBUG) + log.setLevel(logging.DEBUG) + volume = _parse_volume_config(CONF_TS, iter(tokens)) + + assert volume == result + assert logline in caplog.text + + +@pytest.mark.parametrize( + ("tokens", "result", "logline"), + [ + ([b"name", b"myname"], Plex(timestamp=CONF_TS, name=b"myname"), ""), + ([b"name", b"myname", b"state", b"up"], Plex(timestamp=CONF_TS, name=b"myname", state=PlexState(b"up")), ""), + ([b"state", b"up", b"name", b"myname"], Plex(timestamp=CONF_TS, name=b"myname", state=PlexState(b"up")), ""), + ( + [b"name", b"myname", b"org", b"concat", b"vol", b"myvol", b"state", b"up"], + Plex( + timestamp=CONF_TS, + name=b"myname", + org=PlexOrg(b"concat"), + volume=b"myvol", + state=PlexState(b"up"), + ), + "", + ), + ( + [b"name", b"myname", b"org", b"concat", b"volume", b"myvol", b"state", b"up"], + Plex( + timestamp=CONF_TS, + name=b"myname", + org=PlexOrg(b"concat"), + volume=b"myvol", + state=PlexState(b"up"), + ), + "", + ), + ( + [b"name", b"myname", b"org", b"raid5", b"123", b"volume", b"myvol", b"state", b"up"], + Plex( + timestamp=CONF_TS, + name=b"myname", + org=PlexOrg(b"raid5"), + stripesize=123, + volume=b"myvol", + state=PlexState(b"up"), + ), + "", + ), + ( + [b"name", b"myname", b"org", b"striped", b"123", b"volume", b"myvol", b"state", b"up"], + Plex( + timestamp=CONF_TS, + name=b"myname", + org=PlexOrg(b"striped"), + stripesize=123, + volume=b"myvol", + state=PlexState(b"up"), + ), + "", + ), + ( + [b"name", b"myname", b"org", b"striped", b"0"], + None, + "Invalid stripesize: 0, ignoring plex config", + ), + ( + [b"name", b"myname", b"org", b"striped", b"-123"], + None, + "Invalid stripesize: -123, ignoring plex config", + ), + ([b"name", b"myname", b"foo"], None, "Unknown token b'foo', ignoring plex config"), + ([b"name", b"myname", b"state"], None, "No value for token b'state', ignoring plex config"), + ], +) +def test__parse_plex_config( + caplog: pytest.LogCaptureFixture, + tokens: list[bytes], + result: Volume | None, + logline: str, +) -> None: + caplog.set_level(logging.DEBUG) + log.setLevel(logging.DEBUG) + plex = _parse_plex_config(CONF_TS, iter(tokens)) + + assert plex == result + assert logline in caplog.text + + +@pytest.mark.parametrize( + ("tokens", "result", "logline"), + [ + ([b"name", b"myname", b"drive", b"mydrive"], SD(timestamp=CONF_TS, drive=b"mydrive", name=b"myname"), ""), + ( + [b"name", b"myname", b"drive", b"mydrive", b"state", b"up"], + SD(timestamp=CONF_TS, drive=b"mydrive", name=b"myname", state=SDState(b"up")), + "", + ), + ( + [b"drive", b"mydrive", b"state", b"up", b"name", b"myname"], + SD(timestamp=CONF_TS, drive=b"mydrive", name=b"myname", state=SDState(b"up")), + "", + ), + ( + [ + b"name", + b"myname", + b"drive", + b"mydrive", + b"len", + b"123", + b"driveoffset", + b"135680", + b"plex", + b"myplex", + b"plexoffset", + b"123", + b"state", + b"up", + ], + SD( + timestamp=CONF_TS, + name=b"myname", + drive=b"mydrive", + length=123, + driveoffset=135680, + plex=b"myplex", + plexoffset=123, + state=SDState(b"up"), + ), + "", + ), + ( + [ + b"name", + b"myname", + b"drive", + b"mydrive", + b"length", + b"123", + b"driveoffset", + b"135680", + b"plex", + b"myplex", + b"plexoffset", + b"123", + b"state", + b"up", + ], + SD( + timestamp=CONF_TS, + name=b"myname", + drive=b"mydrive", + length=123, + driveoffset=135680, + plex=b"myplex", + plexoffset=123, + state=SDState(b"up"), + ), + "", + ), + ( + [b"drive", b"mydrive", b"length", b"-123"], + SD( + timestamp=CONF_TS, + drive=b"mydrive", + length=-1, + ), + "", + ), + ( + [b"drive", b"mydrive", b"driveoffset", b"0"], + SD( + timestamp=CONF_TS, + drive=b"mydrive", + driveoffset=0, + ), + "", + ), + ( + [b"drive", b"mydrive", b"driveoffset", b"123"], + None, + "Invalid driveoffset: 123, ignoring sd config", + ), + ( + [b"drive", b"mydrive", b"plexoffset", b"-123"], + None, + "Invalid plexoffset: -123, ignoring sd config", + ), + ([b"drive", b"mydrive", b"foo"], None, "Unknown token b'foo', ignoring sd config"), + ([b"drive", b"mydrive", b"state"], None, "No value for token b'state', ignoring sd config"), + ([b"state", b"up"], None, "No drive found for sd, ignoring sd config"), + ], +) +def test__parse_sd_config(caplog, tokens: list[bytes], result: Volume | None, logline: str) -> None: + caplog.set_level(logging.DEBUG) + log.setLevel(logging.DEBUG) + sd = _parse_sd_config(CONF_TS, iter(tokens)) + + assert sd == result + assert logline in caplog.text + + +@pytest.mark.parametrize( + ("bytestr", "idx", "result"), + [ + (b"abc", -1, b""), + (b"abc", 0, b"a"), + (b"abc", 1, b"b"), + (b"abc", 2, b"c"), + (b"abc", 3, b""), + ], +) +def test_get_char(bytestr: bytes, idx: int, result: bytes) -> None: + assert get_char(bytestr, idx) == result + + +@pytest.mark.parametrize( + ("line", "tokens"), + [ + (b"foo bar", [b"foo", b"bar"]), + (b"foo\tbar", [b"foo", b"bar"]), + (b" \t \t foo bar", [b"foo", b"bar"]), + (b"foo bar \t \t ", [b"foo", b"bar"]), + (b"foo bar#bla", [b"foo", b"bar#bla"]), + (b"foo bar #bla", [b"foo", b"bar"]), + (b"foo bar'", [b"foo", b"bar'"]), + (b'foo bar"', [b"foo", b'bar"']), + ], +) +def test_tokenize(line: bytes, tokens: list[bytes]) -> None: + assert list(tokenize(line)) == tokens + + +@pytest.mark.parametrize( + ("line", "idx"), + [ + (b"foo 'bar", 4), + (b'foo "bar', 5), + ], +) +def test_tokenize_raises(line: bytes, idx: int) -> None: + with pytest.raises(TokenizeError, match=f"Found quoted token at index {idx}"): + list(tokenize(line)) + + +def gen_vps(vps_cls: Volume | Plex | SD, arg_name: str, count: int = 0) -> iter[Volume | Plex | SD]: + vps_name = vps_cls.__name__.lower() + + def vps_iter() -> Volume | Plex | SD: + idx = 0 + done = False + + kwargs = {} + while not done: + kwargs[arg_name] = f"{vps_name}{idx}".encode() + yield vps_cls(timestamp=CONF_TS, **kwargs) + idx += 1 + + if idx == count: + done = True + + return vps_iter() + + +@pytest.mark.parametrize( + ("config", "expected_config", "expected_logs"), + [ + ( + b"volume\nplex\nsd", + { + "volumes": list(gen_vps(Volume, "name", 1)), + "plexes": list(gen_vps(Plex, "name", 1)), + "sds": list(gen_vps(SD, "drive", 1)), + }, + None, + ), + ( + b"volume\x00plex\x00sd", + { + "volumes": list(gen_vps(Volume, "name", 1)), + "plexes": list(gen_vps(Plex, "name", 1)), + "sds": list(gen_vps(SD, "drive", 1)), + }, + None, + ), + ( + b"volume\nplex\x00sd\n", + { + "volumes": list(gen_vps(Volume, "name", 1)), + "plexes": list(gen_vps(Plex, "name", 1)), + "sds": list(gen_vps(SD, "drive", 1)), + }, + None, + ), + ( + b"volume\nvolume\x00plex\nplex\x00\nplex\x00\x00sd\00sd\x00\n\n\x00sd\n\x00\n", + { + "volumes": list(gen_vps(Volume, "name", 2)), + "plexes": list(gen_vps(Plex, "name", 3)), + "sds": list(gen_vps(SD, "drive", 3)), + }, + None, + ), + ( + b"volume\x00foo\nplex\x00sd", + { + "volumes": list(gen_vps(Volume, "name", 1)), + "plexes": list(), + "sds": list(), + }, + "Invalid config line b'foo'", + ), + ], +) +def test_parse_vinum_config( + caplog: pytest.LogCaptureFixture, + config: bytes, + expected_config: VinumConfigs, + expected_logs: None | str, +) -> None: + caplog.set_level(logging.DEBUG) + log.setLevel(logging.DEBUG) + + volumes = gen_vps(Volume, "name") + plexes = gen_vps(Plex, "name") + sds = gen_vps(SD, "drive") + + with ( + patch("dissect.volume.vinum.config._parse_volume_config", autospec=True, side_effect=volumes), + patch("dissect.volume.vinum.config._parse_plex_config", autospec=True, side_effect=plexes), + patch("dissect.volume.vinum.config._parse_sd_config", autospec=True, side_effect=sds), + ): + config_data = parse_vinum_config(CONF_TS, config) + + assert config_data == expected_config + if expected_logs is not None: + assert expected_logs in caplog.text + + +def test_parse_vinum_config_token_error(caplog: pytest.LogCaptureFixture) -> None: + caplog.set_level(logging.DEBUG) + log.setLevel(logging.DEBUG) + + with patch("dissect.volume.vinum.config.tokenize", autospec=True, side_effect=TokenizeError("Oops!")): + parse_vinum_config(CONF_TS, b"b0rk\nb1rk\nb3rk") + + assert "Invalid config line b'b0rk': Oops!" in caplog.text