Source code for ewoksxas.tasks.tests.test_autobk

from dataclasses import dataclass
from typing import TYPE_CHECKING

import larch
import numpy as np
import pytest
from ewoksorange.tests.utils import execute_task
from numpy.typing import NDArray
from silx.resources import resource_filename

from ewoksxas.converters.orange import Converter
from ewoksxas.tasks.autobk import AutoBK

if TYPE_CHECKING:
    from Orange.data import Table


[docs] @dataclass(kw_only=True) class AutoBKStats: """Dataclass for storing X(k) evaluation stats for quantitative comparison.""" name: str mean: float amplitude: float integral: float peaks: int k0: float shape: tuple[int, int]
[docs] def evaluate_autobk_task(inputs: dict, name: str = "") -> AutoBKStats: """Run the task and convert output to AutoBKStats format.""" task_outputs = execute_task(AutoBK, inputs=inputs) table: Table = task_outputs["Data"] x, y = Converter.from_table(table).features mean = np.round(y.mean(), decimals=1) amplitude = np.round(y.max(axis=1).mean() - y.min(axis=1).mean(), decimals=1) integral = np.round(np.trapezoid(y, x, axis=1).mean(), decimals=1) peak_filter = amplitude / 100.0 peaks = count_peaks(y.mean(axis=0), filter_value=peak_filter) autobk_stats = AutoBKStats( name=name, mean=mean, integral=integral, amplitude=amplitude, peaks=peaks, k0=y[0, 0], shape=table.X.shape, # pyright: ignore[reportArgumentType] ) return autobk_stats
[docs] def count_peaks(y: NDArray[np.float64], filter_value: float = 0.0) -> int: """Count the number of extrema which are larger than the filter_value.""" dy = np.sign(np.diff(y)) abs_y = np.abs(y) peaks = 1 if abs_y[0] > filter_value else 0 last_y = 0 for i in range(1, len(dy)): if abs_y[i] < filter_value: continue if last_y == np.sign(y[i]): continue if dy[i - 1] != dy[i]: peaks += 1 last_y = np.sign(y[i]) return peaks
# --------------------------------------------------------------------------- # Layer 1 -- task-wiring unit tests # # These stub out larch.xafs.autobk so they exercise only the task's own logic # (output selection, parameter routing, e0/edge_step sourcing) without fitting # any background. They are fast and independent of the installed larch version: # a failure here means *our* task changed, not larch. # --------------------------------------------------------------------------- STUB_K = np.linspace(0.0, 16.0, 50) STUB_CHI = np.linspace(0.1, 1.0, 50) STUB_CHIE_VALUE = 3.0
[docs] @pytest.fixture def stub_autobk(monkeypatch): """Replace ``larch.xafs.autobk`` with a deterministic stub. The stub records the keyword arguments of every call and writes fixed k/chi/chie arrays onto the target group, mimicking the attributes the task reads back after a real autobk run. It yields the list of recorded calls. """ calls: list[dict] = [] def fake_autobk(data, group=None, e0=None, edge_step=None, **kwargs): calls.append({"e0": e0, "edge_step": edge_step, **kwargs}) group.k = STUB_K group.chi = STUB_CHI group.chie = np.full(group.energy.size, STUB_CHIE_VALUE) group.ek0 = 7112.0 # autobk normally writes this; the task falls back to it monkeypatch.setattr("larch.xafs.autobk", fake_autobk) return calls
[docs] def test_default_output_is_chik(fe_foil_one, stub_autobk): """With no 'output' key the task returns chi * k (the chik default).""" outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": {}}) _, y = Converter.from_table(outputs["Data"]).features np.testing.assert_allclose(y[0], STUB_CHI * STUB_K) assert len(stub_autobk) == 1 # one spectrum -> one autobk call
[docs] def test_output_chi_is_unscaled(fe_foil_one, stub_autobk): """output='chi' returns the raw chi from autobk.""" params = {"output": "chi"} outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params}) _, y = Converter.from_table(outputs["Data"]).features np.testing.assert_allclose(y[0], STUB_CHI)
[docs] def test_output_chik2_is_k_squared_weighted(fe_foil_one, stub_autobk): """output='chik2' returns chi * k**2.""" params = {"output": "chik2"} outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params}) _, y = Converter.from_table(outputs["Data"]).features np.testing.assert_allclose(y[0], STUB_CHI * STUB_K * STUB_K)
[docs] def test_output_chie_uses_energy_axis(fe_foil_one, stub_autobk): """output='chie' is reported on the energy grid, not the k grid.""" energy_len = fe_foil_one.X.shape[1] params = {"output": "chie"} outputs = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params}) _, y = Converter.from_table(outputs["Data"]).features assert y.shape[1] == energy_len != STUB_K.size np.testing.assert_allclose(y[0], STUB_CHIE_VALUE)
[docs] def test_e0_parameter_is_forwarded(fe_foil_one, stub_autobk): """An explicit e0 is passed straight through to autobk.""" execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": {"e0": 7100.0}}) assert stub_autobk[0]["e0"] == 7100.0
[docs] def test_edge_step_parameter_is_forwarded(fe_foil_one, stub_autobk): """An explicit edge_step is passed straight through to autobk.""" execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": {"edge_step": 0.5}}) assert stub_autobk[0]["edge_step"] == 0.5
[docs] def test_e0_from_metadata_used_when_not_overridden(fe_foil_one, stub_autobk): """Without an e0 parameter, the table's e0 (from Normalize) is forwarded.""" table_e0 = Converter.from_table(fe_foil_one).get_meta("e0")[0] execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": {}}) assert stub_autobk[0]["e0"] == pytest.approx(table_e0)
[docs] def test_extra_parameters_pass_through(fe_foil_one, stub_autobk): """Parameters other than output/e0/edge_step reach autobk verbatim.""" params = {"output": "chi", "rbkg": 2, "kmin": 1, "kmax": 12, "kweight": 3} execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params}) call = stub_autobk[0] assert call["rbkg"] == 2 assert call["kmin"] == 1 assert call["kmax"] == 12 assert call["kweight"] == 3
# --------------------------------------------------------------------------- # Layer 2 -- invariant integration tests (real larch, no absolute numbers) # ---------------------------------------------------------------------------
[docs] def test_autobk_output_type(fe_foil_one): """Output types share one fit and differ only by exact in-task transforms. These relationships hold regardless of larch's numeric output. """ tables = {} fits = {} for output_type in ("chi", "chik", "chik2", "chie"): params = {"output": output_type} out = execute_task(AutoBK, inputs={"Data": fe_foil_one, "parameters": params}) tables[output_type] = out["Data"] fits[output_type] = out["Groups"][0].chi # The background fit -- and therefore the raw chi on each group -- is # identical for every output type; only the reported array differs. These # are produced in one run, so an exact element-wise comparison is portable. reference_fit = fits["chi"] for fit in fits.values(): np.testing.assert_array_equal(fit, reference_fit) k, chi = Converter.from_table(tables["chi"]).features _, chik = Converter.from_table(tables["chik"]).features _, chik2 = Converter.from_table(tables["chik2"]).features energy, chie = Converter.from_table(tables["chie"]).features # k-weighting is exact in-task arithmetic, independent of larch. np.testing.assert_array_equal(chik, chi * k) np.testing.assert_array_equal(chik2, chi * k * k) # chi oscillates about zero; k-weighting forces the k=0 sample to zero # while the unweighted chi is non-zero there. assert round(float(chi.mean()), 1) == 0.0 assert np.all(chik[:, 0] == 0.0) assert np.all(chik2[:, 0] == 0.0) assert np.all(chi[:, 0] != 0.0) # chi/chik/chik2 share the k grid; chie is reported on the energy grid. assert chi.shape == chik.shape == chik2.shape assert chie.shape[1] == energy.size == fe_foil_one.X.shape[1] > k.size
[docs] def test_autobk_rbkg(fe_foil_one): """Test autobk task by changing rbkg parameter.""" data = {} rbkg_ls = [1, 3, 5] for rbkg in rbkg_ls: params = {"output": "chi", "rbkg": rbkg} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{rbkg}"] = evaluate_autobk_task(inputs=inputs, name=str(rbkg)) rbkg_1: AutoBKStats = data["1"] rbkg_3: AutoBKStats = data["3"] rbkg_5: AutoBKStats = data["5"] # Mean of chi should be 0. assert rbkg_1.mean == rbkg_3.mean == rbkg_5.mean == 0 # Larger rbkg increases oscillations, reduces amplitude. assert rbkg_1.amplitude > rbkg_3.amplitude > rbkg_5.amplitude assert rbkg_1.peaks < rbkg_3.peaks < rbkg_5.peaks assert rbkg_1.k0 > rbkg_3.k0 > rbkg_5.k0 # Increased oscillations push the integral magnitude toward zero. assert abs(rbkg_1.integral) > abs(rbkg_3.integral) >= abs(rbkg_5.integral)
[docs] def test_autobk_e0(fe_foil_one): """Test autobk task by changing e0 parameter.""" data = {} e0_ls = [7105, 7112, 7132] # pre-edge, white-line, post-edge for e0 in e0_ls: params = {"output": "chi", "e0": e0} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{e0}"] = evaluate_autobk_task(inputs=inputs, name=str(e0)) e0_1: AutoBKStats = data["7105"] e0_2: AutoBKStats = data["7112"] e0_3: AutoBKStats = data["7132"] # Mean of chi should be 0. assert e0_1.mean == e0_2.mean == e0_3.mean == 0.0 # e0 adjusts size of k-array. assert e0_1.shape > e0_2.shape > e0_3.shape # Moving e0 towards post-edge lowers value of chi at k0 (little else changes). assert e0_1.amplitude > e0_2.amplitude > e0_3.amplitude assert e0_1.integral > e0_2.integral > e0_3.integral assert e0_1.k0 > e0_2.k0 > e0_3.k0
[docs] def test_autobk_kmin(fe_foil_one): """Test autobk task by changing kmin parameter.""" data = {} kmin_ls = [0, 0.5, 1] for kmin in kmin_ls: params = {"output": "chi", "kmin": kmin} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{kmin}"] = evaluate_autobk_task(inputs=inputs, name=str(kmin)) kmin_1: AutoBKStats = data["0"] kmin_2: AutoBKStats = data["0.5"] kmin_3: AutoBKStats = data["1"] # Mean of chi should be 0. assert kmin_1.mean == kmin_2.mean == kmin_3.mean == 0 # Raising kmin lifts amplitude and integral to a plateau; k0 keeps rising. assert kmin_3.amplitude == kmin_2.amplitude > kmin_1.amplitude assert kmin_3.k0 > kmin_2.k0 > kmin_1.k0 assert kmin_3.integral == kmin_2.integral > kmin_1.integral # Peaks should be unchanged. assert kmin_1.peaks == kmin_2.peaks == kmin_3.peaks # kmin should not crop any data. assert kmin_1.shape == kmin_2.shape == kmin_3.shape
[docs] def test_autobk_kmax(fe_foil_one): """Test autobk task by changing kmax parameter.""" data = {} kmax_ls = [16, 12, 8] for kmax in kmax_ls: params = {"output": "chi", "kmax": kmax} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{kmax}"] = evaluate_autobk_task(inputs=inputs, name=str(kmax)) kmax_16: AutoBKStats = data["16"] kmax_12: AutoBKStats = data["12"] kmax_8: AutoBKStats = data["8"] # Mean of chi should be 0. assert kmax_16.mean == kmax_12.mean == kmax_8.mean == 0 # Only data points are cut: amplitude and integral are unchanged, k0 differs # slightly but randomly, and fewer high-k oscillations survive at low kmax. assert kmax_16.amplitude == kmax_12.amplitude == kmax_8.amplitude assert kmax_16.k0 != kmax_12.k0 != kmax_8.k0 assert kmax_16.integral == kmax_12.integral == kmax_8.integral assert kmax_16.peaks == kmax_12.peaks > kmax_8.peaks # Data arrays should be smaller with smaller kmax. assert (1, 338) > kmax_16.shape > kmax_12.shape > kmax_8.shape
[docs] def test_autobk_kweight(fe_foil_one): """Test autobk task by changing kweight parameter.""" data = {} kweight_ls = [0, 1, 2, 3] for kweight in kweight_ls: params = {"output": "chi", "kweight": kweight} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{kweight}"] = evaluate_autobk_task(inputs=inputs, name=str(kweight)) kw_0: AutoBKStats = data["0"] kw_1: AutoBKStats = data["1"] kw_2: AutoBKStats = data["2"] kw_3: AutoBKStats = data["3"] # Mean of chi should be 0. assert kw_0.mean == kw_1.mean == kw_2.mean == kw_3.mean == 0.0 # kweight increases chi values. The integral gain shrinks at high kweight, # where the decimals=1 rounding collapses the last step to a plateau. assert kw_0.amplitude < kw_1.amplitude < kw_2.amplitude < kw_3.amplitude assert kw_0.k0 < kw_1.k0 < kw_2.k0 < kw_3.k0 assert kw_0.integral < kw_1.integral < kw_2.integral <= kw_3.integral # Chi values increase without extra oscillations / noise: kweight=0 keeps # the low-k noise that k-weighting suppresses, so it has the most peaks; # from kweight=1 onwards the peak count is stable. assert kw_0.peaks > kw_1.peaks == kw_2.peaks == kw_3.peaks
[docs] def test_autobk_dk(fe_foil_one): """Test autobk task by changing dk parameter.""" data = {} dk_ls = [0, 1] for dk in dk_ls: params = {"output": "chi", "dk": dk} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{dk}"] = evaluate_autobk_task(inputs=inputs, name=str(dk)) dk_0: AutoBKStats = data["0"] dk_1: AutoBKStats = data["1"] # Mean of chi should be 0. assert dk_0.mean == dk_1.mean == 0.0 # dk slightly increases chi values. assert dk_0.amplitude < dk_1.amplitude assert dk_0.k0 < dk_1.k0 # Most values remain unchanged. assert dk_0.integral == dk_1.integral assert dk_0.peaks == dk_1.peaks
[docs] def test_autobk_nclamp(fe_foil_one): """Test autobk task by changing nclamp parameter.""" data = {} nclamp_ls = [0, 1, 2, 3, 4, 5] for nclamp in nclamp_ls: params = {"output": "chi", "nclamp": nclamp} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{nclamp}"] = evaluate_autobk_task(inputs=inputs, name=str(nclamp)) # For these data nclamp is a no-op: every setting yields the same output, # whatever larch's absolute values happen to be. assert all(val.mean == 0.0 for val in data.values()) assert len({val.amplitude for val in data.values()}) == 1 assert len({val.integral for val in data.values()}) == 1 assert len({val.peaks for val in data.values()}) == 1 assert len({round(val.k0, 3) for val in data.values()}) == 1
[docs] def test_autobk_clamp_lo(fe_foil_one): """Test autobk task by changing clamp_lo parameter.""" data = {} clow_ls = [0, 1, 5, 10] for clamp_lo in clow_ls: params = {"output": "chi", "clamp_lo": clamp_lo} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{clamp_lo}"] = evaluate_autobk_task(inputs=inputs, name=str(clamp_lo)) lo_0: AutoBKStats = data["0"] lo_1: AutoBKStats = data["1"] lo_5: AutoBKStats = data["5"] lo_10: AutoBKStats = data["10"] # Mean of chi should be 0. assert lo_0.mean == lo_1.mean == lo_5.mean == lo_10.mean == 0.0 # clamp_lo pushes X(k = 0) to 0. assert lo_0.amplitude == lo_1.amplitude > lo_5.amplitude > lo_10.amplitude assert lo_0.k0 > lo_1.k0 > lo_5.k0 > lo_10.k0 assert lo_0.integral == lo_1.integral > lo_5.integral > lo_10.integral
[docs] def test_autobk_clamp_hi(fe_foil_one): """Test autobk task by changing clamp_hi parameter.""" data = {} chigh_ls = [0, 1, 5, 10] for clamp_hi in chigh_ls: params = {"output": "chi", "clamp_hi": clamp_hi} inputs = {"Data": fe_foil_one, "parameters": params} data[f"{clamp_hi}"] = evaluate_autobk_task(inputs=inputs, name=str(clamp_hi)) # clamp_hi pushes X(kmax) to 0, but for these data X(kmax) ~ 0 already, so # every setting yields the same output regardless of larch's absolute values. assert all(val.mean == 0.0 for val in data.values()) assert len({val.amplitude for val in data.values()}) == 1 assert len({val.integral for val in data.values()}) == 1 assert len({val.peaks for val in data.values()}) == 1 assert len({round(val.k0, 3) for val in data.values()}) == 1
# --------------------------------------------------------------------------- # Layer 3 -- larch characterization test # # This is the single place that pins larch's absolute autobk output. The # invariant tests above only check relationships, so a change in larch's # numbers slips past them; this test catches it. A failure here after a larch # upgrade means larch's algorithm changed (not our task): review the new output # and, if it is correct, update LARCH_REFERENCE below. # --------------------------------------------------------------------------- # Captured on fe_foil_one with output="chi". LARCH_REFERENCE = { "larch_version": "2026.1.2", "amplitude": 0.7, "integral": 0.2, "peaks": 17, "k0": 0.5976, "k_len": 338, }
[docs] def test_autobk_matches_larch_reference(fe_foil_one): """Pin larch's absolute autobk output against a known-good reference. A failure means larch's autobk algorithm changed; review it and, if the new output is correct, update LARCH_REFERENCE (and the snapshot in the next test). """ stats = evaluate_autobk_task( inputs={"Data": fe_foil_one, "parameters": {"output": "chi"}} ) ref = LARCH_REFERENCE installed = getattr(larch, "__version__", "unknown") msg = ( f"larch autobk output changed: reference larch={ref['larch_version']}, " f"installed={installed}. Review the change and update LARCH_REFERENCE " "if it is intended." ) assert stats.mean == 0.0, msg assert stats.amplitude == pytest.approx(ref["amplitude"], abs=0.05), msg assert stats.integral == pytest.approx(ref["integral"], abs=0.05), msg assert stats.peaks == ref["peaks"], msg assert stats.k0 == pytest.approx(ref["k0"], abs=1e-3), msg assert stats.shape[1] == ref["k_len"], msg
[docs] def test_autobk_chi_matches_snapshot(fe_foil_one): """Compare the full chi waveform against a stored larch snapshot. This catches subtle drift that the summary statistics above miss. Regenerate the snapshot with ``python -m ewoksxas.resources.data.regenerate_reference`` once a larch change has been reviewed. """ reference_path = resource_filename( "ewoksxas:data/snapshots/autobk_chi_reference.npz" ) reference = np.load(reference_path) outputs = execute_task( AutoBK, inputs={"Data": fe_foil_one, "parameters": {"output": "chi"}} ) k, chi = Converter.from_table(outputs["Data"]).features installed = getattr(larch, "__version__", "unknown") reference_version = reference["larch_version"] msg = ( f"larch autobk chi waveform changed (reference larch={reference_version}, " f"installed={installed}). Review and regenerate the snapshot if the " "change is intended." ) np.testing.assert_allclose(k, reference["k"], rtol=1e-6, atol=1e-8, err_msg=msg) np.testing.assert_allclose(chi, reference["chi"], rtol=1e-6, atol=1e-8, err_msg=msg)