"""Shared utilities for the Bruker BES3T and ESP loaders.
Implements parameter-file parsing (.DSC / .par), binary matrix reading
from .DTA / .spc payloads, and directory listing for Bruker file sets.
"""
import re
import warnings
from pathlib import Path
from typing import List, Union
import numpy as np
# Regular expression to check if a string can be converted to a number
_NUMBER_RE = re.compile(r"^[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$")
[docs]
def read_par_file(par_file_path: Path) -> dict:
"""Reads a Bruker ESP/WinEPR .par file (key-value pairs)."""
parameters = {}
if not par_file_path.is_file():
raise FileNotFoundError(f"Cannot find the parameter file {par_file_path}")
try:
with open(par_file_path, "r", encoding="latin-1") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split(maxsplit=1)
if len(parts) < 1:
continue
key = parts[0]
# Check if key starts with a letter (basic validation)
if not key[0].isalpha():
continue
value = parts[1].strip() if len(parts) > 1 else ""
# Remove surrounding single quotes if present
if len(value) >= 2 and value.startswith("'") and value.endswith("'"):
value = value[1:-1]
# Basic cleaning of key for dictionary access if needed
# Note: Bruker keys are typically valid identifiers
# if not key.isidentifier():
# key = re.sub(r'\W|^(?=\d)', '_', key) # Simple sanitization
parameters[key] = value
except Exception as e:
raise IOError(f"Error reading PAR file {par_file_path}: {e}") from e
if parameters.get("JEX"):
parameters["XAXIS_NAME"] = parameters["JEX"]
if parameters.get("JUN"):
parameters["XAXIS_UNIT"] = parameters["JUN"]
if parameters.get("XXUN"):
parameters["XAXIS_UNIT"] = parameters["XXUN"]
if parameters.get("JEY"):
parameters["YAXIS_NAME"] = parameters["JEY"]
if parameters.get("XYUN"):
parameters["YAXIS_UNIT"] = parameters["XYUN"]
return parameters
[docs]
def read_dsc_file(dsc_file_path: Path) -> dict:
"""Reads a Bruker BES3T .DSC file (key-value pairs, handles line continuation)."""
parameters = {}
if not dsc_file_path.is_file():
raise FileNotFoundError(f"Cannot find the descriptor file {dsc_file_path}")
lines = []
try:
with open(dsc_file_path, "r", encoding="latin-1") as f:
lines = f.readlines()
except Exception as e:
raise IOError(f"Error reading DSC file {dsc_file_path}: {e}") from e
processed_lines = []
i = 0
while i < len(lines):
line = lines[i].strip()
# Handle line continuation characters '\'
while line.endswith("\\"):
i += 1
if i < len(lines):
line = line[:-1] + lines[i].strip()
else:
line = line[:-1] # Remove trailing '\' even if it's the last line
processed_lines.append(line.replace("\\n", "\n")) # Replace escaped newlines
i += 1
for line in processed_lines:
if not line:
continue
parts = line.split(maxsplit=1)
if len(parts) < 1:
continue
key = parts[0]
# Stop if Manipulation History Layer is reached
if key.upper() == "#MHL":
break
# Skip lines not starting with a letter (comments, etc.)
if not key[0].isalpha():
continue
value = parts[1].strip() if len(parts) > 1 else ""
# Remove surrounding single quotes if present
if len(value) >= 2 and value.startswith("'") and value.endswith("'"):
value = value[1:-1]
# Basic cleaning of key if needed (Bruker keys usually okay)
# if not key.isidentifier():
# key = re.sub(r'\W|^(?=\d)', '_', key)
parameters[key] = value
if parameters.get("XNAM"):
parameters["XAXIS_NAME"] = parameters["XNAM"]
parameters["XAXIS_UNIT"] = parameters["XUNI"]
if parameters.get("YNAM"):
parameters["YAXIS_NAME"] = parameters["YNAM"]
parameters["YAXIS_UNIT"] = parameters["YUNI"]
return parameters
[docs]
def parse_field_params(parameters: dict) -> dict:
"""
Attempts to convert string values in a dictionary to numbers (int or float).
"""
parsed_params = {}
for key, value in parameters.items():
if isinstance(value, str):
# Try converting to int
try:
parsed_params[key] = int(value)
continue
except ValueError:
pass
# Try converting to float
try:
# Regex-gated float() so non-numeric strings stay as strings.
if _NUMBER_RE.match(value):
parsed_params[key] = float(value)
else:
parsed_params[key] = value # Keep as string if not number-like
except ValueError:
parsed_params[key] = value # Keep original string if conversion fails
else:
parsed_params[key] = value # Keep non-string values as they are
return parsed_params
[docs]
def get_matrix(
data_file_path: Path,
dimensions: List[int],
number_format_code: str,
byte_order: str,
is_complex: Union[bool, np.ndarray],
) -> np.ndarray:
"""Read binary EPR data from disk into a NumPy array.
Parameters
----------
data_file_path : pathlib.Path
Path to the binary data file (.DTA / .spc).
dimensions : list of int
Three-element list ``[nx, ny, nz]``.
number_format_code : str
NumPy short dtype code (``'i1'``, ``'i2'``, ``'i4'``, ``'f4'``, ``'f8'``).
byte_order : str
``'ieee-be'`` (big-endian) or ``'ieee-le'`` (little-endian).
is_complex : bool or np.ndarray
Whether the payload is complex. An array allows per-channel flags.
Returns
-------
np.ndarray
Loaded data, reshaped to the requested dimensions.
"""
if not data_file_path.is_file():
raise FileNotFoundError(f"Data file not found: {data_file_path}")
# Determine numpy dtype and endianness
dt_char = ">" if byte_order == "ieee-be" else "<"
try:
# Construct dtype using standard codes (e.g., '>f8', '<i4')
dtype = np.dtype(f"{dt_char}{number_format_code}")
except TypeError as e: # Catch potential error during dtype creation
# Add original exception context using 'from e'
raise ValueError(f"Unsupported number format code: {number_format_code}") from e
# Calculate expected number of elements
n_points_total = int(np.prod(dimensions))
if n_points_total == 0:
return np.array([])
# Handle potentially complex data reading
# For now, assume is_complex is a single boolean
# A more complex implementation could handle mixed real/complex channels
is_complex_flag = (
np.any(is_complex) if isinstance(is_complex, (list, np.ndarray)) else is_complex
)
if is_complex_flag:
n_values_to_read = n_points_total * 2
actual_dtype = dtype.base # Read underlying real type
else:
n_values_to_read = n_points_total
actual_dtype = dtype
# Read raw data from file
try:
raw_data = np.fromfile(
data_file_path, dtype=actual_dtype, count=n_values_to_read
)
except Exception as e:
raise IOError(f"Error reading data file {data_file_path}: {e}") from e
# Verify number of elements read
if raw_data.size < n_values_to_read:
raise IOError(
f"Could not read expected number of data points from {data_file_path}. "
f"Expected {n_values_to_read}, got {raw_data.size}."
)
elif raw_data.size > n_values_to_read:
warnings.warn(
f"Read more data points ({raw_data.size})"
f" than expected ({n_values_to_read}) "
f"from {data_file_path}. Truncating.",
stacklevel=2,
)
raw_data = raw_data[:n_values_to_read]
# Combine real and imaginary parts if complex
if is_complex_flag:
if raw_data.size % 2 != 0:
raise ValueError("Read odd number of values for complex data.")
data = raw_data[::2] + 1j * raw_data[1::2]
else:
data = raw_data
# Reshape the data - NumPy uses C order (last index fastest)
# MATLAB uses Fortran order (first index fastest)
# BES3T/ESP files are typically C-ordered (X varies fastest)
# Reshape to (nz, ny, nx) if 3D, (ny, nx) if 2D, (nx,) if 1D
shape_numpy_order = [
d for d in dimensions[::-1] if d > 1
] # Reverse and remove dims of size 1
if not shape_numpy_order: # If all dims are 1 or empty
shape_numpy_order = (n_points_total,)
try:
# Use squeeze to remove dimensions of size 1, similar to MATLAB's behavior
data = data.reshape(shape_numpy_order).squeeze()
# If the result is 0-dim after squeeze (single point), make it 1-dim
if data.ndim == 0:
data = data.reshape(1)
except ValueError as e:
raise ValueError(
f"Could not reshape data with {data.size} points"
f" into desired shape {shape_numpy_order}."
f" Original dims: {dimensions}. Error: {e}"
) from e
return data
[docs]
def BrukerListFiles(path, recursive=False):
"""List Bruker EPR data files (.DTA, .dta, .SPC, .spc) in ``path``.
Parameters
----------
path : str or pathlib.Path
Directory to scan.
recursive : bool, optional
If True, also descend into subdirectories. Default is False.
Returns
-------
list of pathlib.Path
Sorted list of matching files.
Raises
------
NotADirectoryError
If ``path`` is not an existing directory.
Examples
--------
>>> from epyr import BrukerListFiles
>>> files = BrukerListFiles("examples/data")
>>> any(p.name.endswith(".DTA") for p in files)
True
>>> all_files = BrukerListFiles("examples/data", recursive=True)
>>> len(all_files) >= len(files)
True
"""
exts = {".dta", ".DTA", ".spc", ".SPC"}
path = Path(path)
if not path.is_dir():
raise NotADirectoryError(f"{path} is not a valid directory.")
if recursive:
files = [p for p in path.rglob("*") if p.suffix in exts and p.is_file()]
else:
files = [p for p in path.iterdir() if p.suffix in exts and p.is_file()]
return sorted(files)