Source code for epyr.sub.utils

"""Shared utilities for the Bruker BES3T and ESP loaders.

Implements parameter-file parsing (.DSC / .par), binary matrix reading
from .DTA / .spc payloads, and directory listing for Bruker file sets.
"""

import re
import warnings
from pathlib import Path
from typing import List, Union

import numpy as np

# Regular expression to check if a string can be converted to a number
_NUMBER_RE = re.compile(r"^[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$")


[docs] def read_par_file(par_file_path: Path) -> dict: """Reads a Bruker ESP/WinEPR .par file (key-value pairs).""" parameters = {} if not par_file_path.is_file(): raise FileNotFoundError(f"Cannot find the parameter file {par_file_path}") try: with open(par_file_path, "r", encoding="latin-1") as f: for line in f: line = line.strip() if not line: continue parts = line.split(maxsplit=1) if len(parts) < 1: continue key = parts[0] # Check if key starts with a letter (basic validation) if not key[0].isalpha(): continue value = parts[1].strip() if len(parts) > 1 else "" # Remove surrounding single quotes if present if len(value) >= 2 and value.startswith("'") and value.endswith("'"): value = value[1:-1] # Basic cleaning of key for dictionary access if needed # Note: Bruker keys are typically valid identifiers # if not key.isidentifier(): # key = re.sub(r'\W|^(?=\d)', '_', key) # Simple sanitization parameters[key] = value except Exception as e: raise IOError(f"Error reading PAR file {par_file_path}: {e}") from e if parameters.get("JEX"): parameters["XAXIS_NAME"] = parameters["JEX"] if parameters.get("JUN"): parameters["XAXIS_UNIT"] = parameters["JUN"] if parameters.get("XXUN"): parameters["XAXIS_UNIT"] = parameters["XXUN"] if parameters.get("JEY"): parameters["YAXIS_NAME"] = parameters["JEY"] if parameters.get("XYUN"): parameters["YAXIS_UNIT"] = parameters["XYUN"] return parameters
[docs] def read_dsc_file(dsc_file_path: Path) -> dict: """Reads a Bruker BES3T .DSC file (key-value pairs, handles line continuation).""" parameters = {} if not dsc_file_path.is_file(): raise FileNotFoundError(f"Cannot find the descriptor file {dsc_file_path}") lines = [] try: with open(dsc_file_path, "r", encoding="latin-1") as f: lines = f.readlines() except Exception as e: raise IOError(f"Error reading DSC file {dsc_file_path}: {e}") from e processed_lines = [] i = 0 while i < len(lines): line = lines[i].strip() # Handle line continuation characters '\' while line.endswith("\\"): i += 1 if i < len(lines): line = line[:-1] + lines[i].strip() else: line = line[:-1] # Remove trailing '\' even if it's the last line processed_lines.append(line.replace("\\n", "\n")) # Replace escaped newlines i += 1 for line in processed_lines: if not line: continue parts = line.split(maxsplit=1) if len(parts) < 1: continue key = parts[0] # Stop if Manipulation History Layer is reached if key.upper() == "#MHL": break # Skip lines not starting with a letter (comments, etc.) if not key[0].isalpha(): continue value = parts[1].strip() if len(parts) > 1 else "" # Remove surrounding single quotes if present if len(value) >= 2 and value.startswith("'") and value.endswith("'"): value = value[1:-1] # Basic cleaning of key if needed (Bruker keys usually okay) # if not key.isidentifier(): # key = re.sub(r'\W|^(?=\d)', '_', key) parameters[key] = value if parameters.get("XNAM"): parameters["XAXIS_NAME"] = parameters["XNAM"] parameters["XAXIS_UNIT"] = parameters["XUNI"] if parameters.get("YNAM"): parameters["YAXIS_NAME"] = parameters["YNAM"] parameters["YAXIS_UNIT"] = parameters["YUNI"] return parameters
[docs] def parse_field_params(parameters: dict) -> dict: """ Attempts to convert string values in a dictionary to numbers (int or float). """ parsed_params = {} for key, value in parameters.items(): if isinstance(value, str): # Try converting to int try: parsed_params[key] = int(value) continue except ValueError: pass # Try converting to float try: # Regex-gated float() so non-numeric strings stay as strings. if _NUMBER_RE.match(value): parsed_params[key] = float(value) else: parsed_params[key] = value # Keep as string if not number-like except ValueError: parsed_params[key] = value # Keep original string if conversion fails else: parsed_params[key] = value # Keep non-string values as they are return parsed_params
[docs] def get_matrix( data_file_path: Path, dimensions: List[int], number_format_code: str, byte_order: str, is_complex: Union[bool, np.ndarray], ) -> np.ndarray: """Read binary EPR data from disk into a NumPy array. Parameters ---------- data_file_path : pathlib.Path Path to the binary data file (.DTA / .spc). dimensions : list of int Three-element list ``[nx, ny, nz]``. number_format_code : str NumPy short dtype code (``'i1'``, ``'i2'``, ``'i4'``, ``'f4'``, ``'f8'``). byte_order : str ``'ieee-be'`` (big-endian) or ``'ieee-le'`` (little-endian). is_complex : bool or np.ndarray Whether the payload is complex. An array allows per-channel flags. Returns ------- np.ndarray Loaded data, reshaped to the requested dimensions. """ if not data_file_path.is_file(): raise FileNotFoundError(f"Data file not found: {data_file_path}") # Determine numpy dtype and endianness dt_char = ">" if byte_order == "ieee-be" else "<" try: # Construct dtype using standard codes (e.g., '>f8', '<i4') dtype = np.dtype(f"{dt_char}{number_format_code}") except TypeError as e: # Catch potential error during dtype creation # Add original exception context using 'from e' raise ValueError(f"Unsupported number format code: {number_format_code}") from e # Calculate expected number of elements n_points_total = int(np.prod(dimensions)) if n_points_total == 0: return np.array([]) # Handle potentially complex data reading # For now, assume is_complex is a single boolean # A more complex implementation could handle mixed real/complex channels is_complex_flag = ( np.any(is_complex) if isinstance(is_complex, (list, np.ndarray)) else is_complex ) if is_complex_flag: n_values_to_read = n_points_total * 2 actual_dtype = dtype.base # Read underlying real type else: n_values_to_read = n_points_total actual_dtype = dtype # Read raw data from file try: raw_data = np.fromfile( data_file_path, dtype=actual_dtype, count=n_values_to_read ) except Exception as e: raise IOError(f"Error reading data file {data_file_path}: {e}") from e # Verify number of elements read if raw_data.size < n_values_to_read: raise IOError( f"Could not read expected number of data points from {data_file_path}. " f"Expected {n_values_to_read}, got {raw_data.size}." ) elif raw_data.size > n_values_to_read: warnings.warn( f"Read more data points ({raw_data.size})" f" than expected ({n_values_to_read}) " f"from {data_file_path}. Truncating.", stacklevel=2, ) raw_data = raw_data[:n_values_to_read] # Combine real and imaginary parts if complex if is_complex_flag: if raw_data.size % 2 != 0: raise ValueError("Read odd number of values for complex data.") data = raw_data[::2] + 1j * raw_data[1::2] else: data = raw_data # Reshape the data - NumPy uses C order (last index fastest) # MATLAB uses Fortran order (first index fastest) # BES3T/ESP files are typically C-ordered (X varies fastest) # Reshape to (nz, ny, nx) if 3D, (ny, nx) if 2D, (nx,) if 1D shape_numpy_order = [ d for d in dimensions[::-1] if d > 1 ] # Reverse and remove dims of size 1 if not shape_numpy_order: # If all dims are 1 or empty shape_numpy_order = (n_points_total,) try: # Use squeeze to remove dimensions of size 1, similar to MATLAB's behavior data = data.reshape(shape_numpy_order).squeeze() # If the result is 0-dim after squeeze (single point), make it 1-dim if data.ndim == 0: data = data.reshape(1) except ValueError as e: raise ValueError( f"Could not reshape data with {data.size} points" f" into desired shape {shape_numpy_order}." f" Original dims: {dimensions}. Error: {e}" ) from e return data
[docs] def BrukerListFiles(path, recursive=False): """List Bruker EPR data files (.DTA, .dta, .SPC, .spc) in ``path``. Parameters ---------- path : str or pathlib.Path Directory to scan. recursive : bool, optional If True, also descend into subdirectories. Default is False. Returns ------- list of pathlib.Path Sorted list of matching files. Raises ------ NotADirectoryError If ``path`` is not an existing directory. Examples -------- >>> from epyr import BrukerListFiles >>> files = BrukerListFiles("examples/data") >>> any(p.name.endswith(".DTA") for p in files) True >>> all_files = BrukerListFiles("examples/data", recursive=True) >>> len(all_files) >= len(files) True """ exts = {".dta", ".DTA", ".spc", ".SPC"} path = Path(path) if not path.is_dir(): raise NotADirectoryError(f"{path} is not a valid directory.") if recursive: files = [p for p in path.rglob("*") if p.suffix in exts and p.is_file()] else: files = [p for p in path.iterdir() if p.suffix in exts and p.is_file()] return sorted(files)