"""This module contains math utilities for the irradiapy package."""
# pylint: disable=unbalanced-tuple-unpacking
from typing import Any, Callable
import numpy as np
from numpy import typing as npt
from numpy.lib.recfunctions import structured_to_unstructured as _str2unstr
from scipy.optimize import curve_fit
# region Math
[docs]
def repeated_prime_factors(n: int) -> list[int]:
"""Return the prime factors of n (with repetition).
Parameters
----------
n : int
The number to factorize.
Returns
-------
list[int]
List of prime factors of n, including repetitions.
"""
facs = []
# Factor out 2s
while n % 2 == 0:
facs.append(2)
n //= 2
# Factor odd primes up to sqrt(n)
f = 3
while f * f <= n:
while n % f == 0:
facs.append(f)
n //= f
f += 2
if n > 1:
facs.append(n)
return facs
# endregion
# region Lorentzian
[docs]
def lorentzian(
xs: npt.NDArray[np.float64],
x_peak: float,
linewidth: float,
amplitude: float,
asymmetry: float,
) -> float | npt.NDArray[np.float64]:
"""Evaluate a Lorentzian function.
Parameters
----------
xs : npt.NDArray[np.float64]
Where to evaluate the function.
x_peak : float
Position with maximum value.
linewidth : float
Linewidth.
amplitude : float
Maximum amplitude.
asymmetry : float
Asymmetry.
Returns
-------
float | npt.NDArray[np.float64]
Evaluated Lorentzian function.
References
----------
See https://doi.org/10.1016/j.nimb.2021.05.014
"""
delta_x = xs - x_peak
linewidth_sq = linewidth**2
exp_term = np.exp(asymmetry * delta_x)
alpha = (1.0 + exp_term) ** 2
alpha_quarter = alpha / 4.0
exponent = -alpha_quarter * delta_x**2 / (2.0 * linewidth_sq)
return amplitude * alpha_quarter * np.exp(exponent)
[docs]
def fit_lorentzian(
xs: npt.NDArray[np.float64],
ys: npt.NDArray[np.float64],
p0: npt.NDArray[np.float64] | None = None,
asymmetry: float = 1.0,
) -> tuple[
npt.NDArray[np.float64],
npt.NDArray[np.float64],
Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]],
]:
"""Fit data to a Lorentzian function.
Parameters
----------
xs : npt.NDArray[np.float64]
X values where the function is evaluated.
ys : npt.NDArray[np.float64]
Y values at the given xs.
p0 : npt.NDArray[np.float64] | None, optional (default=None)
Initial guess of fit parameters. If None, a guess is generated.
asymmetry : float, optional (default=1.0)
Bound for the asymmetry fit parameter. Fit will be done in (-asymmetry, asymmetry).
Returns
-------
popt : npt.NDArray[np.float64]
Optimal values for the parameters.
pcov : npt.NDArray[np.float64]
Covariance of popt.
fit_function : Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]]
Function that evaluates the fitted Lorentzian.
"""
if p0 is None:
peak_index = np.argmax(ys)
x_start = xs[0]
x_end = xs[-1]
sigma_guess = 0.5 * (x_end - x_start)
p0 = np.array(
[
xs[peak_index],
sigma_guess,
ys[peak_index],
0.0,
]
)
x_start = xs[0]
x_end = xs[-1]
x_sum = x_start + x_end
popt, pcov = curve_fit(
lorentzian,
xs,
ys,
p0=p0,
bounds=(
[x_start, 0.0, ys.min(), -asymmetry],
[x_end, x_sum, ys.max(), asymmetry],
),
)
errors = np.sqrt(np.diag(pcov)).astype(float)
def fit_function(xs_fit: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
return lorentzian(xs_fit, *popt)
return popt, errors, fit_function
# endregion
# region Gaussian
[docs]
def gaussian(
xs: npt.NDArray[np.float64],
x_peak: float,
linewidth: float,
amplitude: float,
asymmetry: float,
) -> float | npt.NDArray[np.float64]:
"""Evaluate a Gaussian function.
Parameters
----------
xs : npt.NDArray[np.float64]
Where to evaluate the function.
x_peak : float
Position with maximum value.
linewidth : float
Linewidth.
amplitude : float
Maximum amplitude.
asymmetry : float
Asymmetry.
Returns
-------
float | npt.NDArray[np.float64]
Evaluated Gaussian function.
References
----------
See https://doi.org/10.1016/j.nimb.2021.05.014
"""
delta_x = xs - x_peak
linewidth_sq = linewidth**2
exp_term = np.exp(asymmetry * delta_x)
alpha = (1.0 + exp_term) ** 2
alpha_quarter = alpha / 4.0
exponent = -alpha_quarter * delta_x**2 / (2.0 * linewidth_sq)
return amplitude * alpha_quarter * np.exp(exponent)
[docs]
def fit_gaussian(
xs: npt.NDArray[np.float64],
ys: npt.NDArray[np.float64],
p0: npt.NDArray[np.float64] | None = None,
asymmetry: float = 1.0,
) -> tuple[
npt.NDArray[np.float64],
npt.NDArray[np.float64],
Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]],
]:
"""Fit data to a Gaussian function.
Parameters
----------
xs : npt.NDArray[np.float64]
X values where the function is evaluated.
ys : npt.NDArray[np.float64]
Y values at the given xs.
p0 : npt.NDArray[np.float64] | None, optional (default=None)
Initial guess of fit parameters. If None, a guess is generated.
asymmetry : float, optional (default=1.0)
Bound for the asymmetry fit parameter. Fit will be done in (-asymmetry, asymmetry).
Returns
-------
popt : npt.NDArray[np.float64]
Optimal values for the parameters.
pcov : npt.NDArray[np.float64]
Covariance of popt.
fit_function : Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]]
Function that evaluates the fitted Gaussian.
"""
if p0 is None:
peak_index = np.argmax(ys)
x_start = xs[0]
x_end = xs[-1]
sigma_guess = 0.5 * (x_end - x_start)
p0 = np.array(
[
xs[peak_index],
sigma_guess,
ys[peak_index],
0.0,
]
)
x_start = xs[0]
x_end = xs[-1]
x_sum = x_start + x_end
popt, pcov = curve_fit(
gaussian,
xs,
ys,
p0=p0,
bounds=(
[x_start, 0.0, ys.min(), -asymmetry],
[x_end, x_sum, ys.max(), asymmetry],
),
)
errors = np.sqrt(np.diag(pcov)).astype(float)
def fit_function(xs_fit: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
return gaussian(xs_fit, *popt)
return popt, errors, fit_function
# endregion
# region Power law
[docs]
def power_law(
x: npt.NDArray[np.float64], a: float, k: float
) -> npt.NDArray[np.float64]:
"""Evaluate a power law: y = a * x**k
Parameters
----------
x : npt.NDArray[np.float64]
Input values.
a : float
Prefactor.
k : float
Exponent.
Returns
-------
npt.NDArray[np.float64]
Evaluated power law.
"""
return a * x**k
[docs]
def fit_power_law(
xs: npt.NDArray[np.float64],
ys: npt.NDArray[np.float64],
yerrs: npt.NDArray[np.float64] | None = None,
) -> tuple[
npt.NDArray[np.float64],
npt.NDArray[np.float64],
Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]],
]:
"""Fit a power law to the given histogram data: y = a * x**k.
Fitted in log-log space: log(y) = log(a) + k * log(x)
Parameters
----------
xs : npt.NDArray[np.float64]
x-values where the function is evaluated.
ys : npt.NDArray[np.float64]
y-values at the given xs.
yerrs : npt.NDArray[np.float64] | None, optional (default=None)
y-errors.
Returns
-------
popt : npt.NDArray[np.float64]
Optimal values for the parameters.
pcov : npt.NDArray[np.float64]
Covariance of popt.
fit_function : Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]]
Function that evaluates the fitted power law.
"""
if np.any(xs <= 0.0) or np.any(ys <= 0.0):
raise ValueError("xs and ys must be positive for power law fitting.")
if yerrs is not None and np.any(yerrs <= 0.0):
raise ValueError("yerrs must be positive for power law fitting.")
if yerrs is None:
sigma_log = None
absolute_sigma = False
else:
yerrs = np.asarray(yerrs, dtype=float)
sigma_log = yerrs / ys # Uncertainty propagated to log(y).
absolute_sigma = True
popt, pcov = curve_fit(
lambda x, a, b: a + b * x,
np.log(xs),
np.log(ys),
sigma=sigma_log,
absolute_sigma=absolute_sigma,
)
popt = np.array([np.exp(popt[0]), popt[1]])
errors = np.sqrt(np.diag(pcov)).astype(float)
errors = np.array([popt[0] * errors[0], errors[1]])
def fit_function(x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
return power_law(x, *popt)
return popt, errors, fit_function
# endregion
# region Power law
[docs]
def offset_power_law(
x: npt.NDArray[np.float64], a: float, k: float, b: float
) -> npt.NDArray[np.float64]:
"""Evaluate a power law: y = a * x**k + b
Parameters
----------
x : npt.NDArray[np.float64]
Input values.
a : float
Prefactor.
k : float
Exponent.
b : float
Offset.
Returns
-------
npt.NDArray[np.float64]
Evaluated power law with offset.
"""
return a * x**k + b
[docs]
def fit_offset_power_law(
xs: npt.NDArray[np.float64],
ys: npt.NDArray[np.float64],
yerrs: npt.NDArray[np.float64] | None = None,
) -> tuple[
npt.NDArray[np.float64],
npt.NDArray[np.float64],
Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]],
]:
"""Fit a power law with an offset to the given histogram data: y = a * x**k + b.
Note
----
The presence of the offset b prevents a log–log linearization.
We therefore fit the model directly in linear space via nonlinear least squares.
Parameters
----------
xs : npt.NDArray[np.float64]
X values where the function is evaluated.
ys : npt.NDArray[np.float64]
Y values at the given xs.
yerrs : npt.NDArray[np.float64] | None, optional (default=None)
Y errors (standard deviations) in linear space.
Returns
-------
popt : npt.NDArray[np.float64]
Optimal values for the parameters.
pcov : npt.NDArray[np.float64]
Covariance of popt.
fit_function : Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]]
Function that evaluates the fitted power law with offset.
"""
# Power law: y = a * x**k + b
popt, pcov = curve_fit(
offset_power_law,
xs,
ys,
sigma=yerrs,
absolute_sigma=yerrs is not None,
p0=(1.0, 1.0, 0.0),
)
errors = np.sqrt(np.diag(pcov)).astype(float)
def fit_function(x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
return offset_power_law(x, *popt)
return popt, errors, fit_function
# endregion
# region Linear
[docs]
def linear(x: npt.NDArray[np.float64], a: float, b: float) -> npt.NDArray[np.float64]:
"""Evaluate a linear function: y = a * x + b.
Parameters
----------
x : npt.NDArray[np.float64]
Input values.
a : float
Slope of the line.
b : float
Intercept of the line.
Returns
-------
npt.NDArray[np.float64]
Evaluated linear function.
"""
return a * x + b
[docs]
def fit_linear(
xs: npt.NDArray[np.float64],
ys: npt.NDArray[np.float64],
yerrs: npt.NDArray[np.float64] | None = None,
) -> tuple[
npt.NDArray[np.float64],
npt.NDArray[np.float64],
Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]],
]:
"""Fit a linear function to the given data: y = a * x + b.
Parameters
----------
xs : npt.NDArray[np.float64]
X values where the function is evaluated.
ys : npt.NDArray[np.float64]
Y values at the given xs.
yerrs : npt.NDArray[np.float64] | None, optional (default=None)
Y errors.
Returns
-------
popt : npt.NDArray[np.float64]
Optimal values for the parameters.
pcov : npt.NDArray[np.float64]
Covariance of popt.
fit_function : Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]]
Function that evaluates the fitted linear function.
"""
popt, pcov = curve_fit(linear, xs, ys, sigma=yerrs)
errors = np.sqrt(np.diag(pcov)).astype(float)
def fit_function(x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
return linear(x, *popt)
return popt, errors, fit_function
# endregion
# region Atoms quick calculations
[docs]
def apply_boundary_conditions(
data_atoms: dict[str, Any],
x: bool,
y: bool,
z: bool,
) -> dict[str, Any]:
"""Apply boundary conditions to atoms.
Parameters
----------
data_atoms : dict[str, Any]
Atoms data containing atom positions and boundaries.
x : bool
Whether to apply periodic boundary conditions in the x direction.
y : bool
Whether to apply periodic boundary conditions in the y direction.
z : bool
Whether to apply periodic boundary conditions in the z direction.
Returns
-------
dict[str, Any]
Updated atoms data with applied boundary conditions.
"""
xlo, xhi = data_atoms["xlo"], data_atoms["xhi"]
ylo, yhi = data_atoms["ylo"], data_atoms["yhi"]
zlo, zhi = data_atoms["zlo"], data_atoms["zhi"]
dx = xhi - xlo
dy = yhi - ylo
dz = zhi - zlo
atoms = data_atoms["atoms"]
if x:
atoms["x"] = ((atoms["x"] - xlo) % dx) + xlo
data_atoms["boundary"][0] = "pp"
else:
atoms["x"] = np.clip(atoms["x"], xlo, xhi)
data_atoms["boundary"][0] = "ff"
if y:
atoms["y"] = ((atoms["y"] - ylo) % dy) + ylo
data_atoms["boundary"][1] = "pp"
else:
atoms["y"] = np.clip(atoms["y"], ylo, yhi)
data_atoms["boundary"][1] = "ff"
if z:
atoms["z"] = ((atoms["z"] - zlo) % dz) + zlo
data_atoms["boundary"][2] = "pp"
else:
atoms["z"] = np.clip(atoms["z"], zlo, zhi)
data_atoms["boundary"][2] = "ff"
data_atoms["atoms"] = atoms
return data_atoms
[docs]
def recombine_in_radius(data_defects: dict[str, Any], radius: float) -> dict[str, Any]:
"""Recombine defects (interstitials and vacancies) within a given radius.
Takes into account periodic boundary conditions.
Parameters
----------
data_defects : dict[str, Any]
Defects data containing defect positions and boundaries.
radius : float
Radius within which to recombine defects.
Returns
-------
dict[str, Any]
Updated defects data with recombined defects.
"""
defects = data_defects["atoms"]
boundary = data_defects["boundary"]
xlo, xhi = data_defects["xlo"], data_defects["xhi"]
ylo, yhi = data_defects["ylo"], data_defects["yhi"]
zlo, zhi = data_defects["zlo"], data_defects["zhi"]
cond = defects["type"] == 0
vacs = defects[cond]
sias = defects[~cond]
box = np.array([[xlo, xhi], [ylo, yhi], [zlo, zhi]])
box_size = box[:, 1] - box[:, 0]
vac_pos = _str2unstr(vacs[["x", "y", "z"]])
sia_pos = _str2unstr(sias[["x", "y", "z"]])
# Mask to keep track of recombined interstitials
sia_used = np.zeros(len(sias), dtype=bool)
# List to keep indices of vacancies and interstitials to remove
vac_to_remove = []
sia_to_remove = []
radius2 = radius**2
# For each vacancy, find closest interstitial within radius
for i, vpos in enumerate(vac_pos):
# Compute vector distances to all interstitials
delta = sia_pos - vpos
# Apply minimum image convention for periodic boundary conditions
for d in range(3):
if boundary[d] == "pp":
delta[:, d] -= box_size[d] * np.round(delta[:, d] / box_size[d])
dist2 = np.sum(np.square(delta), axis=1)
# Mask out already recombined interstitials
dist2[sia_used] = np.inf
# Find closest interstitial within squared radius
min_idx = np.argmin(dist2)
if dist2[min_idx] <= radius2:
vac_to_remove.append(i)
sia_to_remove.append(min_idx)
sia_used[min_idx] = True
# Remove recombined vacancies and interstitials
vac_mask = np.ones(len(vacs), dtype=bool)
vac_mask[vac_to_remove] = False
sia_mask = np.ones(len(sias), dtype=bool)
sia_mask[sia_to_remove] = False
defects = np.concatenate([vacs[vac_mask], sias[sia_mask]])
data_defects["atoms"] = defects
return data_defects
# endregion