Source code for hermespy.core.factory

# -*- coding: utf-8 -*-
"""
=====================
Serialization Factory
=====================

This module implements the main interface for loading / dumping HermesPy configurations from / to `YAML`_ files.
Every mutable object that is expected to have its state represented as a text-section within configuration files
must inherit from the :class:`.Serializable` base class.

All :class:`.Serializable` classes within the `hermespy` namespace are detected automatically by the :class:`.Factory`
managing the serialization process.
As a result, dumping any :class:`.Serializable` object state to a `.yml` text file is as easy as

.. code-block:: python

   factory = Factory()
   factory.to_file("dump.yml", serializable)

and can be loaded again just as easily via

.. code-block::  python

        factory = Factory()
        serializable = factory.from_file("dump.yml")

from any context.


.. _YAML: https://yaml.org/
"""

from __future__ import annotations

import re
from abc import ABCMeta, abstractmethod
from collections.abc import Iterable
from enum import Enum
from inspect import getmembers, isclass, signature
from importlib import import_module
from io import TextIOBase, StringIO
import os
from pkgutil import iter_modules
from re import compile, Pattern, Match
from typing import (
    Any,
    Dict,
    Set,
    Sequence,
    Mapping,
    Union,
    KeysView,
    List,
    Optional,
    Tuple,
    Type,
    TypeVar,
    ValuesView,
)

import numpy as np
from h5py import Group
from ruamel.yaml import (
    YAML,
    SafeConstructor,
    SafeRepresenter,
    ScalarNode,
    Node,
    MappingNode,
    SequenceNode,
)
from ruamel.yaml.constructor import ConstructorError

import hermespy
from .logarithmic import Logarithmic, LogarithmicSequence

__author__ = "Jan Adler"
__copyright__ = "Copyright 2024, Barkhausen Institut gGmbH"
__credits__ = ["Jan Adler"]
__license__ = "AGPLv3"
__version__ = "1.3.0"
__maintainer__ = "Jan Adler"
__email__ = "jan.adler@barkhauseninstitut.org"
__status__ = "Prototype"


SerializableType = TypeVar("SerializableType", bound="Serializable")
"""Type of Serializable Class."""


[docs] class Serializable(object): """Base class for serializable classes. Only classes inheriting from `Serializable` will be serialized by the factory. """ yaml_tag: Optional[str] = None """YAML serialization tag. :meta private: """ property_blacklist: Set[str] = set() """Set of properties to be ignored during serialization. :meta private: """ serialized_attributes: Set[str] = set() """Set of object attributes to be serialized. :meta private: """ @staticmethod def _arg_signature() -> Set[str]: """Argument signature. Returns: Additional arguments not inferable from the init signature. :meta private: """ return set() @classmethod def _serializable_attributes( cls: Type[Serializable], blacklist: Optional[Set[str]] = None ) -> Set[str]: """Extract the set of serializable class attributes. Args: cls (Type[Serializable]): Class of the object to be serialized. blacklist (Set[str], optional): List of attribute names to be ignored during extraction. Returns: Set of serializable attribute names. :meta private: """ if blacklist: blacklist = blacklist.copy() blacklist.update(cls.property_blacklist) else: blacklist = cls.property_blacklist # Extract initialization signature init_signature = set(signature(cls.__init__).parameters.keys()) # Query serializable properties attributes = set() for attribute_key, attribute_type in getmembers(cls): # Prevent the access to protected or private attributes if attribute_key.startswith("_"): continue # Only add attribute if it isn't blacklisted if attribute_key in blacklist: continue # Make sure the attribute is a property if not isinstance(attribute_type, property): continue # Don't serialize if the property isn't settable if attribute_type.fset is None and attribute_key not in init_signature: continue attributes.add(attribute_key) # Add forced attributes attributes.update(cls.serialized_attributes) return attributes @classmethod def to_yaml( cls: Type[SerializableType], representer: SafeRepresenter, node: SerializableType ) -> Node: """Serialize a serializable object to YAML. Args: representer (SafeRepresenter): A handle to a representer used to generate valid YAML code. The representer gets passed down the serialization tree to each node. node (Serializable): The channel instance to be serialized. Returns: The serialized YAML node. :meta private: """ return node._mapping_serialization_wrapper(representer) def _mapping_serialization_wrapper( self, representer: SafeRepresenter, blacklist: Optional[Set[str]] = None, additional_fields: Optional[Dict[str, Any]] = None, ) -> MappingNode: """Conveniently serializes the class to a YAML mapping node. Args: blacklist (Set[str], optional): Properties to be ignored during serialization. additional_fields (Dict[str, Any], optional): Additional fields to be serialized. Returns: A YAML mapping node representing this object. :meta private: """ # Init additional fields additional_fields = additional_fields if additional_fields else {} # Query serializable properties serializable_atrributes = self._serializable_attributes(blacklist) # Construct state dictionary by querying serializable attributes state: Dict[str, Any] = {} for attribute_key in serializable_atrributes: attribute_value = getattr(self, attribute_key) # Don't serialize attribute if it is None if attribute_value is None: continue state[attribute_key] = attribute_value # Add additional fields to state if additional_fields: state.update(additional_fields) # Create YAML mapping return representer.represent_mapping(self.yaml_tag, state) @classmethod def from_yaml( cls: Type[SerializableType], constructor: SafeConstructor, node: Node ) -> SerializableType: """Recall a new serializable class instance from YAML. Args: constructor (SafeConstructor): A handle to the constructor extracting the YAML information. node (Node): YAML node representing the `Serializable` serialization. Returns: The de-serialized object. :meta private: """ # Handle empty yaml nodes if isinstance(node, ScalarNode): return cls() return cls.InitializationWrapper(constructor.construct_mapping(node, deep=True)) @classmethod def InitializationWrapper( cls: Type[SerializableType], configuration: Dict[str, Any] ) -> SerializableType: """Conveniently initializes serializable classes. Args: configuration (Dict[str, Any]): Configuration parameter dictionary. Returns: SerializableArray: Initialized class instance. :meta private: """ # Extract initialization signature init_signature = list(signature(cls.__init__).parameters.keys()) arg_signature = cls._arg_signature() init_signature.remove("self") # Extract settable class properties properties = cls._serializable_attributes() init_parameters: Dict[str, Any] = {} init_properties: Dict[str, Any] = {} for configuration_key in list(configuration.keys()): if configuration_key in init_signature or configuration_key in arg_signature: init_parameters[configuration_key] = configuration.pop(configuration_key) continue lower_key = configuration_key.lower() if lower_key in init_signature or lower_key in arg_signature: # pragma: no cover init_parameters[lower_key] = configuration.pop(configuration_key) continue if configuration_key in properties: init_properties[configuration_key] = configuration.pop(configuration_key) continue if lower_key in properties: # pragma: no cover init_properties[lower_key] = configuration.pop(configuration_key) continue # Initialize class # Remaining configuration fields get treated as kwargs init_parameters.update(configuration) try: instance = cls(**init_parameters) except TypeError as e: raise TypeError(f"Error while attempting to initialize '{cls.__name__}', {str(e)}") # Configure properties for property_name, property_value in init_properties.items(): try: setattr(instance, property_name, property_value) except AttributeError as e: raise AttributeError( f"Error while attempting to configure '{property_name}', {str(e)}" ) # Return configured class instance return instance
SET = TypeVar("SET", bound="SerializableEnum") """Type of serializable enumeration."""
[docs] class SerializableEnum(Serializable, Enum): """Base class for serializable enumerations."""
[docs] @classmethod def from_parameters(cls: Type[SET], enum: SET | int | str) -> SET: """Initialize enumeration from multiple parameters. Args: enum (SET | int | str): The parameter from which the enum should be initialized. Returns: The initialized enumeration. """ if isinstance(enum, cls): return enum elif isinstance(enum, int): return cls(enum) elif isinstance(enum, str): return cls[enum] else: raise ValueError("Unknown serializable enumeration type")
@classmethod def from_yaml(cls: Type[SerializableEnum], _: SafeConstructor, node: Node) -> SerializableEnum: # Convert scalar string representation back to enum return cls[node.value] @classmethod def to_yaml( cls: Type[SerializableEnum], representer: SafeRepresenter, node: SerializableEnum ) -> ScalarNode: # Convert enum to scalar string representation return representer.represent_scalar(cls.yaml_tag, "{.name}".format(node)) @classmethod # type: ignore @property def yaml_tag(cls) -> str: # type: ignore return cls.__name__
[docs] class Factory: """Helper class to load HermesPy simulation scenarios from YAML configuration files.""" extensions: Set[str] = {".yml", ".yaml", ".cfg"} """List of recognized filename extensions for serialization files.""" __yaml: YAML __clean: bool __db_regex: Pattern __tag_registry: Mapping[str, Type[Serializable]] def __init__(self) -> None: # YAML dumper configuration self.__yaml = YAML(typ="safe", pure=True) self.__yaml.default_flow_style = False self.__yaml.compact(seq_seq=False, seq_map=False) self.__yaml.encoding = None self.__yaml.indent(mapping=4, sequence=4, offset=2) self.__clean = True self.__tag_registry = {} # Add custom representers self.__yaml.representer.add_representer(complex, Factory.__complex_representer) self.__yaml.representer.add_representer(np.ndarray, Factory.__array_representer) self.__yaml.representer.add_representer(np.float_, Factory.__numpy_float_representer) # Add custom constructors self.__yaml.constructor.add_constructor("complex", Factory.__complex_constructor) self.__yaml.constructor.add_constructor("array", Factory.__array_constructor) self.__yaml.constructor.add_constructor("dB", Factory.__logarithmic_constructor) # Iterate over all modules within the hermespy namespace # Scan for serializable classes lookup_paths = list(hermespy.__path__) + [ os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) ] for _, name, is_module in iter_modules(lookup_paths, hermespy.__name__ + "."): if not is_module: continue # pragma: no cover module = import_module(name) for _, serializable_class in getmembers(module): if not isclass(serializable_class) or not issubclass( serializable_class, Serializable ): continue # Register serializable class at the YAML factory self.__yaml.register_class(serializable_class) # Remember tag for tagged classes if serializable_class.yaml_tag is not None: self.__tag_registry[serializable_class.yaml_tag] = serializable_class # Construct regular expressions for purging self.__range_regex = compile( r"([0-9.e-]*)[ ]*,[ ]*([0-9.e-]*)[ ]*,[ ]*\.\.\.[ ]*,[ ]*([0-9.e-]*)" ) self.__db_regex = compile(r"\[([ 0-9.,-]*)\][ ]*dB") @property def clean(self) -> bool: """Use clean YAML standard. Disabling the clean flag will deactivate additional text processing to the YAML configuration files done by Hermes, such as dB conversion or linear number spaces. Returns: Clean flag. """ return self.__clean @clean.setter def clean(self, flag: bool) -> None: self.__clean = flag @property def registered_classes(self) -> ValuesView[Type[Serializable]]: """Classes registered for serialization within the factory.""" return self.__tag_registry.values() @property def registered_tags(self) -> KeysView[str]: """Read registered YAML tags.""" return self.__tag_registry.keys() @property def tag_registry(self) -> Mapping[str, Type[Serializable]]: """Read registered YAML tags.""" return self.__tag_registry @staticmethod def __complex_representer(representer: SafeRepresenter, value: complex) -> ScalarNode: """Represent complex numbers as strings. Args: representer (SafeRepresenter): YAML representer. value (complex): The complex number to be transformed to a string. Returns: Scalar yaml node. """ complex_string = str(value)[1:-1] return representer.represent_scalar("complex", complex_string) @staticmethod def __complex_constructor(constructor: SafeConstructor, node: ScalarNode) -> complex: """Construct a complex number from YAML. Args: constructor (SafeConstructor): YAML constructor. node (ScalarNode): The YAML node representing the complex number. Returns: A complex number. """ complex_number = complex(constructor.construct_scalar(node)) return complex_number @staticmethod def __array_representer(representer: SafeRepresenter, array: np.ndarray) -> SequenceNode: """Represent numpy arrays as lists. Args: representer (SafeRepresenter): YAML representer. array (np.ndarray): The array to be transformed to a sequence. Returns: Sequence yaml node. """ # Transform complex numpy arrays to their string representation if array.dtype in [np.complex64, np.complex128]: object_array = np.empty(array.shape, dtype=object) for index, number in np.ndenumerate(array): object_array[index] = str(number).replace("(", "").replace(")", "") list = object_array.tolist() else: list = array.tolist() sequence = representer.represent_sequence("array", list, flow_style=True) return sequence @staticmethod def __numpy_float_representer(representer: SafeRepresenter, value: np.float_) -> ScalarNode: """Represent numy floating point scalar numbers as strings. Args: representer (SafeRepresenter): YAML representer. value (np.float_): The number to be transformed to a string. Returns: Scalar yaml node. """ return representer.represent_float(float(value)) @staticmethod def __array_constructor(constructor: SafeConstructor, node: SequenceNode) -> np.ndarray: """Construct a numpy array from YAML. Args: constructor (SafeConstructor): YAML constructor. node (ScalarNode): The YAML node representing the array. Returns: A numpy array. """ if isinstance(node, SequenceNode): return np.array([Factory.__array_constructor(constructor, n) for n in node.value]) if "j" in node.value: return Factory.__complex_constructor(constructor, node) else: return constructor.construct_object(node) @staticmethod def __logarithmic_constructor( constructor: SafeConstructor, node: Union[ScalarNode, SequenceNode] ) -> Union[Logarithmic, LogarithmicSequence]: """Construct a logarithmic value or sequence from YAML. Args: constructor (SafeConstructor): YAML constructor. node (Union[ScalarNode, SequenceNode]): The YAML node representing the array. Returns: A logarithmic representation. """ if isinstance(node, ScalarNode): return Logarithmic(float(constructor.construct_scalar(node))) if isinstance(node, SequenceNode): return LogarithmicSequence(constructor.construct_sequence(node)) @staticmethod def __decibel_conversion(match: re.Match) -> str: """Convert YAML sequences with dB annotations to tagged sequences. Args: match (re.Match): The serialization sequence to be converted. Returns: str: The purged sequence. """ linear_values = [float(str_rep) for str_rep in match[1].replace(" ", "").split(",")] string_replacement = "!<dB> [" for linear_value in linear_values: string_replacement += str(linear_value) + ", " string_replacement += "]" return string_replacement
[docs] def from_path(self, paths: Union[str, Set[str]]) -> Sequence[Any]: """Load a configuration from an arbitrary file system path. Args: paths (Union[str, Set[str]]): Paths to a file or a folder featuring .yml config files. Returns: Serializable objects recalled from `paths`. Raises: ValueError: If the provided `path` does not exist on the filesystem. """ # Convert single path to a set if required if isinstance(paths, str): paths = {paths} hermes_objects = [] for path in paths: if not os.path.exists(path): raise ValueError(f"Lookup path '{path}' not found") if os.path.isdir(path): deserialization = self.from_folder(path) else: deserialization = self.from_file(path) if isinstance(deserialization, list): hermes_objects += deserialization else: hermes_objects.append(deserialization) # pragma: no cover return hermes_objects
[docs] def from_folder( self, path: str, recurse: bool = True, follow_links: bool = False ) -> Sequence[Any] | Any: """Load a configuration from a folder. Args: path (str): Path to the folder configuration. recurse (bool, optional): Recurse into sub-folders within `path`. follow_links (bool, optional): Follow links within `path`. Returns: Serializable objects recalled from `path`. Raises: ValueError: If `path` is not a directory. """ if not os.path.exists(path): raise ValueError("Lookup path '{}' not found".format(path)) if not os.path.isdir(path): raise ValueError("Lookup path '{}' is not a directory".format(path)) hermes_objects: List[Any] = [] for directory, _, files in os.walk(path, followlinks=follow_links): for file in files: _, extension = os.path.splitext(file) if extension in self.extensions: deserialization = self.from_file(os.path.join(directory, file)) hermes_objects += ( deserialization if isinstance(deserialization, list) else [deserialization] ) if not recurse: break return hermes_objects
[docs] def to_folder(self, path: str, *args: Any) -> None: """Dump a configuration to a folder. Args: path (str): Path to the folder configuration. *args (Any): Configuration objects to be dumped. """ pass # pragma: no cover
[docs] def from_str(self, config: str) -> Sequence[Any] | Any: """Load a configuration from a string object. Args: config (str): The configuration to be loaded. Returns: List of objects or object from `config`. """ stream = StringIO(config) return self.from_stream(stream)
[docs] def to_str(self, *args: Any) -> str: """Dump a configuration to a folder. Args: *args (Any): Configuration objects to be dumped. Returns: str: String containing full YAML configuration. Raises: RepresenterError: If objects in ``*args`` are unregistered classes. """ stream = StringIO() self.to_stream(stream, args) return stream.getvalue()
[docs] def from_file(self, file: str) -> Sequence[Any] | Any: """Load a configuration from a single YAML file. Args: file (str): Path to the folder configuration. Returns: Serialized objects within `path`. """ with open(file, mode="r") as file_stream: try: return self.from_stream(file_stream) # Re-raise constructor errors with the correct file name except ConstructorError as constructor_error: constructor_error.problem_mark.name = file raise constructor_error
[docs] def to_file(self, path: str, *args: Any) -> None: """Dump a configuration to a single YML file. Args: path (str): Path to the configuration file. *args (Any): Configuration objects to be dumped. Raises: RepresenterError: If objects in ``*args`` are unregistered classes. """ pass # pragma: no cover
@staticmethod def __range_restore_callback(m: Match) -> str: """Internal regular expression callback. Args: m (Match): Regular expression match. Returns: str: The processed match line. """ # Extract range parameters start = float(m.group(1)) step = float(m.group(2)) - start stop = float(m.group(3)) + step range = np.arange(start=start, stop=stop, step=step) replacement = "" for step in range[:-1]: replacement += str(step) + ", " replacement += str(range[-1]) return replacement
[docs] def from_stream(self, stream: TextIOBase) -> Sequence[Any] | Any: """Load a configuration from an arbitrary text stream. Args: stream (TextIOBase): Text stream containing the configuration. Returns: List of deserialized objects or object within `stream`. Raises: ConstructorError: If YAML parsing fails. """ if not self.__clean: return self.__yaml.load(stream) clean_stream = "" for line in stream.readlines(): clean_line = self.__range_regex.sub(self.__range_restore_callback, line) clean_line = self.__db_regex.sub(self.__decibel_conversion, clean_line) clean_stream += clean_line hermes_objects = self.__yaml.load(StringIO(clean_stream)) # If the deserialization is empty, return an empty list if hermes_objects is None: return [] # If the deserialization is a single item, return just the item if isinstance(hermes_objects, Sequence) and len(hermes_objects) == 1: return hermes_objects[0] return hermes_objects
[docs] def to_stream(self, stream: TextIOBase, *args: Iterable[Any]) -> None: """Dump a configuration to an arbitrary text stream. Args: stream (TextIOBase): Text stream to the configuration. *args (Any): Configuration objects to be dumped. Raises: RepresenterError: If objects in ``*args`` are unregistered classes. """ for serializable_object in args: self.__yaml.dump(serializable_object, stream)
HDFSerializableType = TypeVar("HDFSerializableType", bound="HDFSerializable") """Type of HDF Serializable Class"""
[docs] class HDFSerializable(metaclass=ABCMeta): """Base class for object serializable to the HDF5 format. Structures are serialized to HDF5 files by the :meth:`to_HDF<HDFSerializable.to_HDF>` routine and de-serialized by the :meth:`from_HDF<HDFSerializable.from_HDF>` method, respectively. """ @abstractmethod def to_HDF(self, group: Group) -> None: """Serialize the object state to HDF5. Dumps the object's state and additional information to a HDF5 group. Args: group (h5py.Group): The HDF5 group to which the object is serialized. :meta private: """ ... # pragma no cover @classmethod @abstractmethod def from_HDF(cls: Type[HDFSerializableType], group: Group) -> HDFSerializableType: """De-Serialized the object state from HDF5. Recalls the object's state from a HDF5 group. Args: group (h5py.Group): The HDF5 group from which the object state is recalled. Returns: The object initialized from the HDF5 group state. :meta private: """ ... # pragma no cover @staticmethod def _create_group(group: Group, name: str) -> Group: """Create an HDF5 group if it does not exist yet. Args: group (h5py.Group): The HDF5 group from which the object state is recalled. name (str): Name of the group to be created. Returns: A handle to group `name`. :meta private: """ if name not in group: return group.create_group(name) else: return group[name] @staticmethod def _write_dataset(group: Group, dataset: str, data: Any | None) -> None: """Write to a dataset. Args: group (h5py.Group): The HDF5 group from which the object state is recalled. dataset (str): The dataset name. data (Any | None): The data to be written to `dataset`. :meta private: """ if dataset in group: del group[dataset] group.create_dataset(dataset, data=data) @staticmethod def _range_to_HDF(group: Group, id: str, value: float | Tuple[float, float]) -> None: """Serialize a range variable to HDF5. Args: group (h5py.Group): The HDF5 group to which the range value is serialized. id (str): Identifier string of the range value. value (float | Tuple[float, float]): The range value to be serialized. Can either be a scalar or a tuple of two values indicating maximum and minimum. """ if isinstance(value, tuple): group.attrs[id + "_min"] = value[0] group.attrs[id + "_max"] = value[1] else: group.attrs[id] = value @staticmethod def _range_from_HDF(group: Group, id: str) -> float | Tuple[float, float]: """Deserialize a range variable from HDF5. Args: group (h5py.Group): The HDF5 group from which the range value is deserialized. id (str): Identifier string of the range value. Returns: The deserialized range value. Can either be a scalar or a tuple of two values indicating maximum and minimum. """ if id in group.attrs: return float(group.attrs[id]) else: return (float(group.attrs[id + "_min"]), float(group.attrs[id + "_max"]))