Source code for irradiapy.io.xyzreader

"""This module contains the `XYZReader` class."""

import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Generator, TextIO

import numpy as np
import numpy.typing as npt



[docs]
@dataclass
class XYZReader:
    """A class to read data from an extended XYZ file.

    Attributes
    ----------
    file_path : Path
        The path to the XYZ file.
    dtype : npt.DTypeLike
        The data type of the properties in the file. By default, it is set to
        `None` and will be determined from the file.
    """

    file_path: Path
    encoding: str = "utf-8"
    dtype: npt.DTypeLike = field(default=None, init=False)
    file: TextIO = field(default=None, init=False)

    def __post_init__(self) -> None:
        self.file = self.file_path.open(encoding=self.encoding)

    def __del__(self) -> None:
        if self.file is not None:
            self.file.close()

    def __iter__(self) -> Generator[npt.NDArray, None, None]:
        """Iterate over subfiles in the XYZ file.

        Yields
        ------
        npt.NDArray
            Array of atom data for each subfile.
        """
        while True:
            line = self.file.readline()
            if not line:
                break
            natoms = int(line)
            line = self.file.readline()
            name_props, type_props, multiplicity_props, dtype = self.__get_properties(
                line
            )
            atoms = np.empty(natoms, dtype=dtype)
            for i in range(natoms):
                line = self.file.readline()
                atoms[i] = self.__line_to_data(
                    line, name_props, type_props, multiplicity_props, dtype
                )
            yield atoms
        self.file.close()

    def __get_properties(
        self, comment: str
    ) -> tuple[list[str], list[type], list[int], np.dtype]:
        """Sets properties using the comment line.

        Parameters
        ----------
        comment : str
            Comment line.

        Returns
        -------
        tuple[list[str], list[type], list[int], np.dtype]
            Properties names, types, multiplicities, and dtype.
        """
        match = re.search(r"Properties=([^ \n]+)", comment)
        if not match:
            raise ValueError("Missing or invalid comment line format.")
        properties = match.group(1).split(":")
        num_properties = len(properties) // 3
        name_props = [properties[i * 3] for i in range(num_properties)]
        type_props = [
            self.__map_type(properties[i * 3 + 1]) for i in range(num_properties)
        ]
        multiplicity_props = [int(properties[i * 3 + 2]) for i in range(num_properties)]
        dtype = np.dtype(
            [
                (
                    (name_props[i], type_props[i])
                    if multiplicity_props[i] == 1
                    else (
                        name_props[i],
                        type_props[i],
                        multiplicity_props[i],
                    )
                )
                for i in range(num_properties)
            ]
        )
        return name_props, type_props, multiplicity_props, dtype

    def __map_type(self, type_str: str) -> type:
        """Maps type string to Python type.

        Parameters
        ----------
        type_str : str
            Type string.
        """
        if type_str == "S":
            return str
        elif type_str == "I":
            return int
        elif type_str == "R":
            return float
        else:
            raise TypeError(f"Unexpected type string: {type_str}")

    def __line_to_data(
        self,
        line: str,
        name_props: list[str],
        type_props: list[type],
        multiplicity_props: list[int],
        dtype: np.dtype,
    ) -> npt.ArrayLike:
        """Turns one line of data into a numpy array.

        Parameters
        ----------
        line : str
            Line containing the data.
        name_props : list[str]
            Names of the properties.
        type_props : list[type]
            Types of the properties.
        multiplicity_props : list[int]
            Multiplicities of the properties.
        dtype : np.dtype
            Data type of the properties.

        Returns
        -------
        npt.ArrayLike
            The data in the line.
        """
        output = np.empty(1, dtype=dtype)
        data = line.split()
        col = 0
        for i, name_prop in enumerate(name_props):
            multiplicity_prop = multiplicity_props[i]
            type_prop = type_props[i]
            if multiplicity_prop == 1:
                output[name_prop] = type_prop(data[col])
            else:
                output[name_prop] = [
                    type_prop(data[col + j]) for j in range(multiplicity_prop)
                ]
            col += multiplicity_prop
        return output[0]