from dataclasses import dataclass
from typing import TYPE_CHECKING
import larch
import numpy as np
import pytest
from ewoksorange.tests.utils import execute_task
from numpy.typing import NDArray
from silx.resources import resource_filename
from ewoksxas.converters.orange import Converter
from ewoksxas.tasks.autobk import AutoBK
if TYPE_CHECKING:
from Orange.data import Table
[docs]
@dataclass(kw_only=True)
class AutoBKStats:
"""Dataclass for storing X(k) evaluation stats for quantitative comparison."""
name: str
mean: float
amplitude: float
integral: float
peaks: int
k0: float
shape: tuple[int, int]
[docs]
def evaluate_autobk_task(inputs: dict, name: str = "") -> AutoBKStats:
"""Run the task and convert output to AutoBKStats format."""
task_outputs = execute_task(AutoBK, inputs=inputs)
table: Table = task_outputs["Data"]
x, y = Converter.from_table(table).features
mean = np.round(y.mean(), decimals=1)
amplitude = np.round(y.max(axis=1).mean() - y.min(axis=1).mean(), decimals=1)
integral = np.round(np.trapezoid(y, x, axis=1).mean(), decimals=1)
peak_filter = amplitude / 100.0
peaks = count_peaks(y.mean(axis=0), filter_value=peak_filter)
autobk_stats = AutoBKStats(
name=name,
mean=mean,
integral=integral,
amplitude=amplitude,
peaks=peaks,
k0=y[0, 0],
shape=table.X.shape, # pyright: ignore[reportArgumentType]
)
return autobk_stats
[docs]
def count_peaks(y: NDArray[np.float64], filter_value: float = 0.0) -> int:
"""Count the number of extrema which are larger than the filter_value."""
dy = np.sign(np.diff(y))
abs_y = np.abs(y)
peaks = 1 if abs_y[0] > filter_value else 0
last_y = 0
for i in range(1, len(dy)):
if abs_y[i] < filter_value:
continue
if last_y == np.sign(y[i]):
continue
if dy[i - 1] != dy[i]:
peaks += 1
last_y = np.sign(y[i])
return peaks
# ---------------------------------------------------------------------------
# Layer 1 -- task-wiring unit tests
#
# These stub out larch.xafs.autobk so they exercise only the task's own logic
# (output selection, parameter routing, e0/edge_step sourcing) without fitting
# any background. They are fast and independent of the installed larch version:
# a failure here means *our* task changed, not larch.
# ---------------------------------------------------------------------------
STUB_K = np.linspace(0.0, 16.0, 50)
STUB_CHI = np.linspace(0.1, 1.0, 50)
STUB_CHIE_VALUE = 3.0
[docs]
@pytest.fixture
def stub_autobk(monkeypatch):
"""Replace ``larch.xafs.autobk`` with a deterministic stub.
The stub records the keyword arguments of every call and writes fixed
k/chi/chie arrays onto the target group, mimicking the attributes the task
reads back after a real autobk run. It yields the list of recorded calls.
"""
calls: list[dict] = []
def fake_autobk(data, group=None, e0=None, edge_step=None, **kwargs):
calls.append({"e0": e0, "edge_step": edge_step, **kwargs})
group.k = STUB_K
group.chi = STUB_CHI
group.chie = np.full(group.energy.size, STUB_CHIE_VALUE)
group.ek0 = 7112.0 # autobk normally writes this; the task falls back to it
monkeypatch.setattr("larch.xafs.autobk", fake_autobk)
return calls
[docs]
def test_default_output_is_chik(fe_foil_one, stub_autobk):
"""With no 'output' key the task returns chi * k (the chik default)."""
outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": {}})
_, y = Converter.from_table(outputs["Data"]).features
np.testing.assert_allclose(y[0], STUB_CHI * STUB_K)
assert len(stub_autobk) == 1 # one spectrum -> one autobk call
[docs]
def test_output_chi_is_unscaled(fe_foil_one, stub_autobk):
"""output='chi' returns the raw chi from autobk."""
params = {"output": "chi"}
outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params})
_, y = Converter.from_table(outputs["Data"]).features
np.testing.assert_allclose(y[0], STUB_CHI)
[docs]
def test_output_chik2_is_k_squared_weighted(fe_foil_one, stub_autobk):
"""output='chik2' returns chi * k**2."""
params = {"output": "chik2"}
outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params})
_, y = Converter.from_table(outputs["Data"]).features
np.testing.assert_allclose(y[0], STUB_CHI * STUB_K * STUB_K)
[docs]
def test_output_chie_uses_energy_axis(fe_foil_one, stub_autobk):
"""output='chie' is reported on the energy grid, not the k grid."""
energy_len = fe_foil_one.X.shape[1]
params = {"output": "chie"}
outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params})
_, y = Converter.from_table(outputs["Data"]).features
assert y.shape[1] == energy_len != STUB_K.size
np.testing.assert_allclose(y[0], STUB_CHIE_VALUE)
[docs]
def test_e0_parameter_is_forwarded(fe_foil_one, stub_autobk):
"""An explicit e0 is passed straight through to autobk."""
execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": {"e0": 7100.0}})
assert stub_autobk[0]["e0"] == 7100.0
[docs]
def test_edge_step_parameter_is_forwarded(fe_foil_one, stub_autobk):
"""An explicit edge_step is passed straight through to autobk."""
execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": {"edge_step": 0.5}})
assert stub_autobk[0]["edge_step"] == 0.5
# ---------------------------------------------------------------------------
# Layer 2 -- invariant integration tests (real larch, no absolute numbers)
# ---------------------------------------------------------------------------
[docs]
def test_autobk_output_type(fe_foil_one):
"""Output types share one fit and differ only by exact in-task transforms.
These relationships hold regardless of larch's numeric output.
"""
tables = {}
fits = {}
for output_type in ("chi", "chik", "chik2", "chie"):
params = {"output": output_type}
out = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params})
tables[output_type] = out["Data"]
fits[output_type] = out["Groups"][0].chi
# The background fit -- and therefore the raw chi on each group -- is
# identical for every output type; only the reported array differs. These
# are produced in one run, so an exact element-wise comparison is portable.
reference_fit = fits["chi"]
for fit in fits.values():
np.testing.assert_array_equal(fit, reference_fit)
k, chi = Converter.from_table(tables["chi"]).features
_, chik = Converter.from_table(tables["chik"]).features
_, chik2 = Converter.from_table(tables["chik2"]).features
energy, chie = Converter.from_table(tables["chie"]).features
# k-weighting is exact in-task arithmetic, independent of larch.
np.testing.assert_array_equal(chik, chi * k)
np.testing.assert_array_equal(chik2, chi * k * k)
# chi oscillates about zero; k-weighting forces the k=0 sample to zero
# while the unweighted chi is non-zero there.
assert round(float(chi.mean()), 1) == 0.0
assert np.all(chik[:, 0] == 0.0)
assert np.all(chik2[:, 0] == 0.0)
assert np.all(chi[:, 0] != 0.0)
# chi/chik/chik2 share the k grid; chie is reported on the energy grid.
assert chi.shape == chik.shape == chik2.shape
assert chie.shape[1] == energy.size == fe_foil_one.X.shape[1] > k.size
[docs]
def test_autobk_rbkg(fe_foil_one):
"""Test autobk task by changing rbkg parameter."""
data = {}
rbkg_ls = [1, 3, 5]
for rbkg in rbkg_ls:
params = {"output": "chi", "rbkg": rbkg}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{rbkg}"] = evaluate_autobk_task(inputs=inputs, name=str(rbkg))
rbkg_1: AutoBKStats = data["1"]
rbkg_3: AutoBKStats = data["3"]
rbkg_5: AutoBKStats = data["5"]
# Mean of chi should be 0.
assert rbkg_1.mean == rbkg_3.mean == rbkg_5.mean == 0
# Larger rbkg increases oscillations, reduces amplitude.
assert rbkg_1.amplitude > rbkg_3.amplitude > rbkg_5.amplitude
assert rbkg_1.peaks < rbkg_3.peaks < rbkg_5.peaks
assert rbkg_1.k0 > rbkg_3.k0 > rbkg_5.k0
# Increased oscillations push the integral magnitude toward zero.
assert abs(rbkg_1.integral) > abs(rbkg_3.integral) >= abs(rbkg_5.integral)
[docs]
def test_autobk_e0(fe_foil_one):
"""Test autobk task by changing e0 parameter."""
data = {}
e0_ls = [7105, 7112, 7132] # pre-edge, white-line, post-edge
for e0 in e0_ls:
params = {"output": "chi", "e0": e0}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{e0}"] = evaluate_autobk_task(inputs=inputs, name=str(e0))
e0_1: AutoBKStats = data["7105"]
e0_2: AutoBKStats = data["7112"]
e0_3: AutoBKStats = data["7132"]
# Mean of chi should be 0.
assert e0_1.mean == e0_2.mean == e0_3.mean == 0.0
# e0 adjusts size of k-array.
assert e0_1.shape > e0_2.shape > e0_3.shape
# Moving e0 towards post-edge lowers value of chi at k0 (little else changes).
assert e0_1.amplitude > e0_2.amplitude > e0_3.amplitude
assert e0_1.integral > e0_2.integral > e0_3.integral
assert e0_1.k0 > e0_2.k0 > e0_3.k0
[docs]
def test_autobk_kmin(fe_foil_one):
"""Test autobk task by changing kmin parameter."""
data = {}
kmin_ls = [0, 0.5, 1]
for kmin in kmin_ls:
params = {"output": "chi", "kmin": kmin}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{kmin}"] = evaluate_autobk_task(inputs=inputs, name=str(kmin))
kmin_1: AutoBKStats = data["0"]
kmin_2: AutoBKStats = data["0.5"]
kmin_3: AutoBKStats = data["1"]
# Mean of chi should be 0.
assert kmin_1.mean == kmin_2.mean == kmin_3.mean == 0
# Raising kmin lifts amplitude and integral to a plateau; k0 keeps rising.
assert kmin_3.amplitude == kmin_2.amplitude > kmin_1.amplitude
assert kmin_3.k0 > kmin_2.k0 > kmin_1.k0
assert kmin_3.integral == kmin_2.integral > kmin_1.integral
# Peaks should be unchanged.
assert kmin_1.peaks == kmin_2.peaks == kmin_3.peaks
# kmin should not crop any data.
assert kmin_1.shape == kmin_2.shape == kmin_3.shape
[docs]
def test_autobk_kmax(fe_foil_one):
"""Test autobk task by changing kmax parameter."""
data = {}
kmax_ls = [16, 12, 8]
for kmax in kmax_ls:
params = {"output": "chi", "kmax": kmax}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{kmax}"] = evaluate_autobk_task(inputs=inputs, name=str(kmax))
kmax_16: AutoBKStats = data["16"]
kmax_12: AutoBKStats = data["12"]
kmax_8: AutoBKStats = data["8"]
# Mean of chi should be 0.
assert kmax_16.mean == kmax_12.mean == kmax_8.mean == 0
# Only data points are cut: amplitude and integral are unchanged, k0 differs
# slightly but randomly, and fewer high-k oscillations survive at low kmax.
assert kmax_16.amplitude == kmax_12.amplitude == kmax_8.amplitude
assert kmax_16.k0 != kmax_12.k0 != kmax_8.k0
assert kmax_16.integral == kmax_12.integral == kmax_8.integral
assert kmax_16.peaks == kmax_12.peaks > kmax_8.peaks
# Data arrays should be smaller with smaller kmax.
assert (1, 338) > kmax_16.shape > kmax_12.shape > kmax_8.shape
[docs]
def test_autobk_kweight(fe_foil_one):
"""Test autobk task by changing kweight parameter."""
data = {}
kweight_ls = [0, 1, 2, 3]
for kweight in kweight_ls:
params = {"output": "chi", "kweight": kweight}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{kweight}"] = evaluate_autobk_task(inputs=inputs, name=str(kweight))
kw_0: AutoBKStats = data["0"]
kw_1: AutoBKStats = data["1"]
kw_2: AutoBKStats = data["2"]
kw_3: AutoBKStats = data["3"]
# Mean of chi should be 0.
assert kw_0.mean == kw_1.mean == kw_2.mean == kw_3.mean == 0.0
# kweight increases chi values. The integral gain shrinks at high kweight,
# where the decimals=1 rounding collapses the last step to a plateau.
assert kw_0.amplitude < kw_1.amplitude < kw_2.amplitude < kw_3.amplitude
assert kw_0.k0 < kw_1.k0 < kw_2.k0 < kw_3.k0
assert kw_0.integral < kw_1.integral < kw_2.integral <= kw_3.integral
# Chi values increase without extra oscillations / noise: kweight=0 keeps
# the low-k noise that k-weighting suppresses, so it has the most peaks;
# from kweight=1 onwards the peak count is stable.
assert kw_0.peaks > kw_1.peaks == kw_2.peaks == kw_3.peaks
[docs]
def test_autobk_dk(fe_foil_one):
"""Test autobk task by changing dk parameter."""
data = {}
dk_ls = [0, 1]
for dk in dk_ls:
params = {"output": "chi", "dk": dk}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{dk}"] = evaluate_autobk_task(inputs=inputs, name=str(dk))
dk_0: AutoBKStats = data["0"]
dk_1: AutoBKStats = data["1"]
# Mean of chi should be 0.
assert dk_0.mean == dk_1.mean == 0.0
# dk slightly increases chi values.
assert dk_0.amplitude < dk_1.amplitude
assert dk_0.k0 < dk_1.k0
# Most values remain unchanged.
assert dk_0.integral == dk_1.integral
assert dk_0.peaks == dk_1.peaks
[docs]
def test_autobk_nclamp(fe_foil_one):
"""Test autobk task by changing nclamp parameter."""
data = {}
nclamp_ls = [0, 1, 2, 3, 4, 5]
for nclamp in nclamp_ls:
params = {"output": "chi", "nclamp": nclamp}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{nclamp}"] = evaluate_autobk_task(inputs=inputs, name=str(nclamp))
# For these data nclamp is a no-op: every setting yields the same output,
# whatever larch's absolute values happen to be.
assert all(val.mean == 0.0 for val in data.values())
assert len({val.amplitude for val in data.values()}) == 1
assert len({val.integral for val in data.values()}) == 1
assert len({val.peaks for val in data.values()}) == 1
assert len({round(val.k0, 3) for val in data.values()}) == 1
[docs]
def test_autobk_clamp_lo(fe_foil_one):
"""Test autobk task by changing clamp_lo parameter."""
data = {}
clow_ls = [0, 1, 5, 10]
for clamp_lo in clow_ls:
params = {"output": "chi", "clamp_lo": clamp_lo}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{clamp_lo}"] = evaluate_autobk_task(inputs=inputs, name=str(clamp_lo))
lo_0: AutoBKStats = data["0"]
lo_1: AutoBKStats = data["1"]
lo_5: AutoBKStats = data["5"]
lo_10: AutoBKStats = data["10"]
# Mean of chi should be 0.
assert lo_0.mean == lo_1.mean == lo_5.mean == lo_10.mean == 0.0
# clamp_lo pushes X(k = 0) to 0.
assert lo_0.amplitude == lo_1.amplitude > lo_5.amplitude > lo_10.amplitude
assert lo_0.k0 > lo_1.k0 > lo_5.k0 > lo_10.k0
assert lo_0.integral == lo_1.integral > lo_5.integral > lo_10.integral
[docs]
def test_autobk_clamp_hi(fe_foil_one):
"""Test autobk task by changing clamp_hi parameter."""
data = {}
chigh_ls = [0, 1, 5, 10]
for clamp_hi in chigh_ls:
params = {"output": "chi", "clamp_hi": clamp_hi}
inputs = {"Data": fe_foil_one, "parameters": params}
data[f"{clamp_hi}"] = evaluate_autobk_task(inputs=inputs, name=str(clamp_hi))
# clamp_hi pushes X(kmax) to 0, but for these data X(kmax) ~ 0 already, so
# every setting yields the same output regardless of larch's absolute values.
assert all(val.mean == 0.0 for val in data.values())
assert len({val.amplitude for val in data.values()}) == 1
assert len({val.integral for val in data.values()}) == 1
assert len({val.peaks for val in data.values()}) == 1
assert len({round(val.k0, 3) for val in data.values()}) == 1
# ---------------------------------------------------------------------------
# Layer 3 -- larch characterization test
#
# This is the single place that pins larch's absolute autobk output. The
# invariant tests above only check relationships, so a change in larch's
# numbers slips past them; this test catches it. A failure here after a larch
# upgrade means larch's algorithm changed (not our task): review the new output
# and, if it is correct, update LARCH_REFERENCE below.
# ---------------------------------------------------------------------------
# Captured on fe_foil_one with output="chi".
LARCH_REFERENCE = {
"larch_version": "2026.1.2",
"amplitude": 0.7,
"integral": 0.2,
"peaks": 17,
"k0": 0.5976,
"k_len": 338,
}
[docs]
def test_autobk_matches_larch_reference(fe_foil_one):
"""Pin larch's absolute autobk output against a known-good reference.
A failure means larch's autobk algorithm changed; review it and, if the new
output is correct, update LARCH_REFERENCE (and the snapshot in the next
test).
"""
stats = evaluate_autobk_task(
inputs={"Data": fe_foil_one, "parameters": {"output": "chi"}}
)
ref = LARCH_REFERENCE
installed = getattr(larch, "__version__", "unknown")
msg = (
f"larch autobk output changed: reference larch={ref['larch_version']}, "
f"installed={installed}. Review the change and update LARCH_REFERENCE "
"if it is intended."
)
assert stats.mean == 0.0, msg
assert stats.amplitude == pytest.approx(ref["amplitude"], abs=0.05), msg
assert stats.integral == pytest.approx(ref["integral"], abs=0.05), msg
assert stats.peaks == ref["peaks"], msg
assert stats.k0 == pytest.approx(ref["k0"], abs=1e-3), msg
assert stats.shape[1] == ref["k_len"], msg
[docs]
def test_autobk_chi_matches_snapshot(fe_foil_one):
"""Compare the full chi waveform against a stored larch snapshot.
This catches subtle drift that the summary statistics above miss.
Regenerate the snapshot with
``python -m ewoksxas.resources.data.regenerate_reference`` once a larch
change has been reviewed.
"""
reference_path = resource_filename(
"ewoksxas:data/snapshots/autobk_chi_reference.npz"
)
reference = np.load(reference_path)
outputs = execute_task(
AutoBK, inputs={"Data": fe_foil_one, "parameters": {"output": "chi"}}
)
k, chi = Converter.from_table(outputs["Data"]).features
installed = getattr(larch, "__version__", "unknown")
reference_version = reference["larch_version"]
msg = (
f"larch autobk chi waveform changed (reference larch={reference_version}, "
f"installed={installed}). Review and regenerate the snapshot if the "
"change is intended."
)
np.testing.assert_allclose(k, reference["k"], rtol=1e-6, atol=1e-8, err_msg=msg)
np.testing.assert_allclose(chi, reference["chi"], rtol=1e-6, atol=1e-8, err_msg=msg)