Source code for ewoksxas.converters.orange

"""Utilities for working with Orange Tables for spectroscopic data.

This module provides classes to convert between Orange Tables and
spectroscopic data representations. While using Orange Tables for this
purpose is not ideal, it allows reusing existing Orange components for data
manipulation and analysis.

Data Representation
-------------------
The names used in the module are inherited from machine learning terminology:
- **features**: Variables that describe the data (x-axis: e.g., energy or wavenumber).
- **targets**: The desired predictions, typically not required for spectroscopy.
- **metas**: Additional variables that provide extra information about the data, but
  are not used for prediction (e.g., motor positions).

For spectroscopic data:
- The x-axis values (e.g., energy) are represented as feature names.
- The intensity values are stored in the feature data, the bulk of the table.
- Metas can store experiment-related information like motor positions.

Example Table Structure
-----------------------
    | location | motor_1 | energy_1  | energy_2  | ...       | energy_n  |
    | -------- | ------- | --------- | --------- | --------- | --------- |
    | (meta)   | (meta)  | (feature) | (feature) | (feature) | (feature) |
    | root     | 12.65   | 1.2       | 0.5       | ...       | 0.4       |
    | leaf     | 15.34   | 0.8       | 1.1       | ...       | 1.6       |
    | stem     | 11.07   | 2.0       | 0.3       | ...       | 2.8       |
    | leaf     | 12.13   | 1.5       | 0.7       | ...       | 3.1       |
    | ...      | ...     | ...       | ...       | ...       | ...       |

Variable types
--------------
Each meta or target is materialized as an Orange ``Variable``:
- ``ContinuousVariable`` for numeric data.
- ``DiscreteVariable`` for categorical data (a small set of repeated labels).
- ``StringVariable`` for free-form text (metas only).

When ``var_type`` is not given it is inferred from the data: numeric data
becomes continuous; non-numeric data with at most ``MAX_DISCRETE_VALUES``
unique values becomes discrete (so it can be used directly by Orange widgets
that group by category); otherwise it falls back to a string for metas, and
raises for targets (which cannot be strings).

Usage
-----
Creating a simple table with the builder pattern:
    >>> x = np.linspace(0, 10, 100)
    >>> spectra = np.array([...])  # shape: (n_rows, n_features)
    >>> table = Converter().add_features(x, spectra).to_table()

Adding metadata and targets:
    >>> areas = np.trapezoid(spectra, x)
    >>> table = (
    ...     Converter()
    ...     .add_features(x, spectra)
    ...     .add_meta("Area", areas)
    ...     .add_meta("Location", locations)
    ...     .add_target("Quality", predictions)
    ...     .to_table()
    ... )

Deriving a task output (new spectra, metadata carried over):
    >>> input_converter = Converter.from_table(table)
    >>> x, y = input_converter.features
    >>> table = (
    ...     input_converter.with_features(x, y * 2)
    ...     .add_meta("e0", e0_values)
    ...     .to_table()
    ... )

With custom variable types:
    >>> table = (
    ...     Converter()
    ...     .add_features(x, spectra)
    ...     .add_meta("Location", locations, var_type=StringVariable)
    ...     .add_target("Quality", predictions, var_type=ContinuousVariable)
    ...     .to_table()
    ... )

Round-trip conversion:
    >>> table = (
    ...     Converter()
    ...     .add_features(x, spectra)
    ...     .add_meta("Area", areas)
    ...     .to_table()
    ... )
    >>> converter = Converter.from_table(table)
    >>> feature_names, spectra = converter.features
    >>> metas = converter.metas
"""

from __future__ import annotations

from copy import deepcopy
from dataclasses import dataclass, field
from enum import Enum
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    from collections.abc import Sequence

import numpy as np
import numpy.typing as npt
from Orange.data import (
    ContinuousVariable,
    DiscreteVariable,
    Domain,
    StringVariable,
    Table,
    Variable,
)
from pandas.util import hash_array



[docs]
class VarType(Enum):
    """Structuring variables as observed within the Orange Canvas."""

    NUMERIC = ContinuousVariable
    CATEGORICAL = DiscreteVariable
    TEXT = StringVariable
    AUTO = None


[docs]
    @classmethod
    def from_label(cls, label: str) -> VarType:
        for var_type in cls:
            if var_type.name == label.upper():
                return var_type
        return cls.AUTO



[docs]
    @classmethod
    def labels(cls) -> list[str]:
        return [var.name.title() for var in cls]



[docs]
    @classmethod
    def infer(cls, data: npt.NDArray[Any], *, role: Role, name: str) -> VarType:
        if data.dtype.kind in "iuf":
            return cls.NUMERIC

        n_unique = len(np.unique(data.astype(str)))
        if cls.CATEGORICAL in role.value and n_unique <= MAX_DISCRETE_VALUES:
            return cls.CATEGORICAL
        if cls.TEXT in role.value:
            return cls.TEXT

        raise ValueError(f"Cannot infer a type for {name!r} with role {role.name}.")





[docs]
class Role(Enum):
    """Enum for table roles with permitted variable types as values."""

    FEATURE = (VarType.NUMERIC,)
    TARGET = (VarType.NUMERIC, VarType.CATEGORICAL)
    META = (VarType.NUMERIC, VarType.CATEGORICAL, VarType.TEXT)



# Maximum number of unique non-numeric values to auto-detect as categorical.
MAX_DISCRETE_VALUES = 10



[docs]
@dataclass(slots=True, eq=False)
class Features:
    """The spectral feature block: x-axis names and the 2D intensity matrix.

    Attributes:
        names: 1D array of x-axis values (e.g. energy), one per feature column.
        data: 2D intensity matrix with shape (n_rows, n_features).
    """

    names: npt.NDArray[Any]
    data: npt.NDArray[np.float64]
    attributes: list[dict[str, str]] = field(default_factory=list)


[docs]
    def matches(self, other: npt.NDArray[Any]) -> bool:
        """Return True when ``other`` is equivalent to this x-axis."""
        if self.names.shape != other.shape or self.names.dtype.kind != other.dtype.kind:
            return False
        if self.names.dtype.kind == "f":
            return bool(np.allclose(self.names, other))
        return bool(np.all(self.names == other))



[docs]
    def get_attributes(self) -> list[dict[str, str]]:
        if len(self.names) != len(self.attributes):
            return [{} for _ in self.names]
        return self.attributes



[docs]
    def to_variables(self) -> list[ContinuousVariable]:
        variables: list[ContinuousVariable] = []
        attributes = self.get_attributes()
        for val, attrs in zip(self.names, attributes, strict=True):
            name = val if isinstance(val, str) else f"{val:.6f}"
            var = ContinuousVariable(name)
            var.attributes = attrs
            # Store original value for recovery in from_table.
            var.raw_name = val
            variables.append(var)
        return variables




def _data_is_numeric(data: npt.NDArray[Any]) -> bool:
    """Raise if the data cannot be used as a ContinuousVariable."""
    if data.dtype.kind in "iuf":
        return True
    try:
        np.asarray(data, dtype=np.float64)
        return True
    except (ValueError, TypeError):
        return False


def _broadcast_default(default: Any, *, shape: tuple[int, ...]) -> npt.NDArray[Any]:
    """Get an array of default values of a specified length.

    Args:
        default: single default value or sequence of values.
        shape: the shape of the returned array.

    Raises:
        ValueError: if default is a sequence and its shape is not `shape`.
    """
    if np.isscalar(default):
        return np.full(shape, default)
    values = np.asarray(default)
    if values.shape != shape:
        raise ValueError(f"Array default must be a scalar or have the shape {shape}.")
    return values



[docs]
@dataclass(slots=True, kw_only=True, eq=False)
class Column:
    """A single named meta/target column together with its Orange variable type.

    For a ``DiscreteVariable`` column ``data`` holds integer category indices
    and ``values`` holds the ordered category labels those indices point into.
    For ``ContinuousVariable`` and ``StringVariable`` columns ``values`` is empty.

    Attributes:
        name: Name of the column.
        data: Underlying NumPy array (category indices for discrete columns).
        var_type: Orange variable class associated with the column.
        values: Ordered categorical labels (discrete columns only).
        kwargs: Extra keyword arguments forwarded to the Orange variable.
    """

    name: str
    data: npt.NDArray[Any]
    var_type: VarType
    values: list[str] = field(default_factory=list)
    attributes: dict[str, Any] = field(default_factory=dict)
    kwargs: dict[str, Any] = field(default_factory=dict)


[docs]
    @classmethod
    def from_data(
        cls,
        name: str,
        data: npt.NDArray[Any],
        var_type: VarType,
        *,
        role: Role,
        attributes: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Column:
        """Resolve the variable type, validate it for the role, and encode the data.

        Raises:
            ValueError: if var_type and role are incompatible.
            ValueError: if var_type is ContinuousVariable and data is not numeric.
        """
        data = np.asarray(data)

        if var_type is VarType.AUTO:
            var_type = VarType.infer(data, role=role, name=name)

        if var_type not in role.value:
            raise ValueError(
                f"{var_type.value.__name__} is not allowed for {role.name} {name!r}: "
                f"use one of {[var.name for var in role.value]}"
            )

        if var_type is VarType.CATEGORICAL:
            return cls._encode_discrete(name, data, kwargs)
        if var_type is VarType.NUMERIC and not _data_is_numeric(data):
            raise ValueError(f"Data for ContinuousVariable {name!r} must be numeric.")

        attributes: dict[str, str] = attributes or {}
        return cls(
            name=name,
            data=data,
            var_type=var_type,
            attributes=attributes,
            kwargs=dict(kwargs),
        )



[docs]
    @classmethod
    def from_variable(
        cls,
        var: Variable,
        data: npt.NDArray[Any],
        *,
        role: Role,
    ) -> Column:
        """Rebuild a column from an Orange Variable and its raw table data.

        Raises:
            ValueError: Propagated from ``from_data`` when the table data is
                inconsistent with the variable (e.g. a missing categorical
                value, or a non-numeric ContinuousVariable).
        """
        var_type = VarType(type(var))
        kwargs = {}
        if var_type is VarType.CATEGORICAL:
            data = np.asarray(data, dtype=np.uint64)
            kwargs.setdefault("values", list(var.values))
        return cls.from_data(
            var.name, data, var_type, role=role, attributes=var.attributes, **kwargs
        )


    @classmethod
    def _encode_discrete(
        cls,
        name: str,
        data: npt.NDArray[Any],
        kwargs: dict[str, Any],
    ) -> Column:
        """Build a DiscreteVariable column with data encoded as integer indices.

        For string/object data the categories come from an explicit ``values=``
        kwarg if present (used by round-trip reconstruction), otherwise from the
        unique values in the data in order of first appearance. Numeric data is
        treated as ready-made indices and requires an explicit ``values=``.

        Raises:
            ValueError: If numeric data is given without ``values=``, if numeric
                indices fall outside ``0..len(values) - 1``, or if string data
                contains a label not present in an explicit ``values=``.
        """
        explicit_values = kwargs.pop("values", None)

        if data.dtype.kind in "OSU":
            labels = (
                [str(v) for v in explicit_values]
                if explicit_values is not None
                else None
            )
            values, indices = cls._encode_labels(data, labels, name)
        else:
            # Already-numeric indices (e.g. Orange stores discrete metas as floats).
            if explicit_values is None:
                raise ValueError(
                    f"DiscreteVariable {name!r} from numeric data requires 'values'."
                )
            values = [str(v) for v in explicit_values]
            indices = np.asarray(data).astype(int)
            if indices.size and (indices.min() < 0 or indices.max() >= len(values)):
                raise ValueError(
                    f"Indices for DiscreteVariable {name!r} must be in "
                    f"0..{len(values) - 1}."
                )

        return cls(
            name=name,
            data=indices,
            var_type=VarType.CATEGORICAL,
            values=values,
            kwargs=kwargs,
        )

    @staticmethod
    def _encode_labels(
        data: npt.NDArray[Any],
        values: list[str] | None,
        name: str,
    ) -> tuple[list[str], npt.NDArray[np.uint64]]:
        """Map string labels to integer indices, deriving categories if not given.

        When ``values`` is None the categories are taken from the data in order of
        first appearance. When ``values`` is given (e.g. round-trip reconstruction)
        it is used verbatim and the data is validated against it.

        Raises:
            ValueError: If ``values`` is given and the data contains a label that
                is not present in it.
        """
        if values is None:
            mapping: dict[str, int] = {}
            indices = np.fromiter(
                (mapping.setdefault(str(v), len(mapping)) for v in data),
                dtype=np.uint64,
                count=len(data),
            )
            return list(mapping), indices

        mapping = {value: index for index, value in enumerate(values)}
        missing = {str(v) for v in np.unique(data)} - set(mapping)
        if missing:
            raise ValueError(
                f"{name!r}: data values not in 'values': {sorted(missing)}"
            )
        indices = np.fromiter(
            (mapping[str(v)] for v in data), dtype=np.int64, count=len(data)
        )
        return values, indices


[docs]
    def to_variable(self) -> Variable:
        """Build the Orange Variable for this column."""
        kwargs = dict(self.kwargs)
        if self.var_type is VarType.CATEGORICAL and self.values:
            kwargs.setdefault("values", self.values)
        var: Variable = self.var_type.value(self.name, **kwargs)
        var.attributes.update(self.attributes)
        return var



[docs]
    def to_object_array(self) -> npt.NDArray[Any]:
        """Return the data as an object array (preserves types when stacked)."""
        return np.asarray(self.data).astype(object)



[docs]
    def get_decoded_data(self) -> npt.NDArray[Any]:
        """Return user-facing data: labels for discrete columns, else raw data."""
        if self.var_type is VarType.CATEGORICAL and self.values:
            labels = np.asarray(self.values, dtype=object)
            return labels[np.asarray(self.data).astype(int)]
        return np.asarray(self.data)



[docs]
    def take_rows(self, indices: Sequence[int]) -> Column:
        """Return an independent copy of the column with only the given rows.

        Args:
            indices: Row positions to keep, in the desired output order.
        """
        return Column(
            name=self.name,
            data=np.asarray(self.data)[list(indices)].copy(),
            var_type=self.var_type,
            values=list(self.values),
            kwargs=deepcopy(self.kwargs),
        )



[docs]
    def is_compatible(self, column: Column) -> bool:
        if self.var_type is VarType.NUMERIC and self.var_type is not column.var_type:
            return False
        return self.name == column.name



[docs]
    def concatenate(self, *others: Column, role: Role) -> Column:
        if self.var_type is VarType.CATEGORICAL:
            base_data = self.get_decoded_data()
        else:
            base_data = np.asarray(self.data)

        data_block = [base_data]
        for other in others:
            if not self.is_compatible(other):
                raise ValueError(
                    f"Cannot concatenate converters with different column structure "
                    f"(mismatch at {other.name!r})."
                )

            if other.var_type is VarType.CATEGORICAL:
                data_block.append(other.get_decoded_data())
            else:
                data_block.append(np.asarray(other.data))

        data = np.concatenate(data_block)

        return Column.from_data(
            name=self.name,
            data=data,
            var_type=VarType.AUTO,  # reinfers TEXT vs CATEGORICAL
            role=role,
            attributes=self.attributes,
            **deepcopy(self.kwargs),
        )


    @property
    def is_numeric(self) -> bool:
        """Whether the column has `VarType.NUMERIC` as its `self.var_type`."""
        return self.var_type is VarType.NUMERIC

    @property
    def is_categorical(self) -> bool:
        """Whether the column has `VarType.CATEGORICAL` as its `self.var_type`."""
        return self.var_type is VarType.CATEGORICAL

    @property
    def is_text(self) -> bool:
        """Whether the column has `VarType.TEXT` as its `self.var_type`."""
        return self.var_type is VarType.TEXT




[docs]
class Converter:
    """Helper class to convert between spectroscopic data and Orange Tables.

    Builder methods that populate or modify a Converter in place
    (``add_features``, ``add_meta``, ``add_metas_from``, ``add_target``)
    return ``self`` for chaining. Methods that produce a new row set
    (``with_features``, ``take_rows``, ``concatenate``) return a new Converter and
    leave the original untouched.

    For efficiency the builder methods and the read accessors (``features``,
    ``metas``, ``targets``) avoid copying where they can: they store and return
    references to the underlying arrays, so callers must not mutate data they
    pass in or read out. Discrete metas and targets are the write-side
    exception: their data is re-encoded into a fresh integer-index array rather
    than stored by reference. ``add_metas_from`` (and thus ``with_features``)
    shares the Column objects of the source, whereas ``take_rows`` and
    ``concatenate`` each return a fully independent copy. On the read side,
    ``get_meta_values`` builds a fresh array for a discrete meta and returns a
    reference to the stored array otherwise, while ``get_meta_row`` returns a
    dict of decoded per-row scalar values.
    """

    def __init__(self) -> None:
        """Initialize an empty Converter.

        Use add_features(), add_meta(), and add_target() to populate data.
        """
        self._features: Features | None = None
        self._metas: list[Column] = []
        self._targets: list[Column] = []

    @property
    def features(self) -> tuple[npt.NDArray[Any], npt.NDArray[np.float64]]:
        """Get the feature names and data.

        Returns:
            A tuple containing feature names and feature data. The names hold
            x-axis values that may be floats or strings.

        Raises:
            ValueError: If no features have been set.
        """
        if self._features is None:
            raise ValueError("Converter has no features data.")
        return self._features.names, self._features.data

    @property
    def metas(self) -> list[Column]:
        """Get the list of metadata column definitions."""
        return self._metas

    @property
    def targets(self) -> list[Column]:
        """Get the list of target column definitions."""
        return self._targets

    @property
    def n_rows(self) -> int:
        """Return the number of rows in the table."""
        if self._features is not None:
            return len(self._features.data)
        for columns in (self._metas, self._targets):
            if columns:
                return len(columns[0].data)
        return 0


[docs]
    def get_meta_names(self) -> list[str]:
        """Get the names of all metadata columns, in order."""
        return [column.name for column in self._metas]



[docs]
    def get_column(self, name: str) -> Column:
        """Get the meta or target column by name.

        Args:
            name: the header or column title of the desired column.
        """
        columns: list[Column] = [*self.metas, *self.targets]
        try:
            return next(column for column in columns if column.name == name)
        except StopIteration as err:
            raise KeyError(f"Column {name!r} is not in the converter.") from err



[docs]
    def get_meta_values(self, name: str, default: Any = None) -> npt.NDArray[Any]:
        """Get the array of values for a specific meta.

        Discrete metas are decoded back to their category labels. When the meta
        is missing, the default determines the result: a scalar is broadcast to
        one value per row, and an array-like with one value per row is used
        verbatim. A default of None means no fallback: a missing meta raises.

        Args:
            name: Meta name.
            default: Fallback used when the meta is missing.

        Raises:
            KeyError: If the meta is missing and no default was given.
        """
        try:
            return self.get_column(name).get_decoded_data()
        except KeyError:
            if default is not None:
                return _broadcast_default(default, shape=(self.n_rows,))
            raise



[docs]
    def get_meta_row(self, index: int) -> dict[str, Any]:
        """Get a dictionary of meta values for the row at the given index.

        Args:
            index: row index

        Raises:
            IndexError: If ``index`` is out of range for the current rows.
        """
        return {column.name: column.get_decoded_data()[index] for column in self._metas}



[docs]
    def add_features(
        self,
        names: npt.NDArray[Any],
        data: npt.NDArray[np.float64],
        attributes: list[dict[str, str]] | None = None,
    ) -> Converter:
        """Set the feature data (x-axis and intensities).

        Any existing feature block is replaced.

        Args:
            names: 1D array of x-axis values (e.g., energy, wavenumber).
            data: 2D array of spectral intensities with shape (n_rows, n_features).
                If 1D, treated as a single row and reshaped to (1, n_features).
            attributes: subtitles or meta information for column header.

        Returns:
            Self for method chaining.

        Raises:
            ValueError: If the number of names does not match the data columns.
        """
        names = np.atleast_1d(names)
        data = np.atleast_2d(np.asarray(data, dtype=np.float64))

        if data.shape[1] != names.size:
            raise ValueError(
                f"Shape mismatch: names has {names.size} values, "
                f"but data has {data.shape[1]} columns."
            )

        attributes = attributes or []
        self._features = Features(names=names, data=data, attributes=attributes)
        return self



[docs]
    def add_meta(
        self,
        name: str,
        data: npt.NDArray[Any],
        var_type: VarType = VarType.AUTO,
        attributes: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Converter:
        """Add a metadata variable, replacing any existing column of that name.

        Metas can be ContinuousVariable, StringVariable, or DiscreteVariable.
        A meta is identified by its name, so adding one whose name already
        exists replaces that column in place (keeping its position) rather than
        appending a duplicate. When the replaced column keeps its variable type,
        its category values and constructor arguments are reused unless
        overridden.

        Args:
            name: Name of the metadata variable.
            data: Array of values, one for each row.
            var_type: Orange Variable type.
            attributes: subtitles or meta information for column header.
            **kwargs: Additional arguments for the Variable constructor.

        Returns:
            Self for chaining.
        """
        self._set_column(
            name, data, var_type, role=Role.META, attributes=attributes, **kwargs
        )
        return self



[docs]
    def get_group_id(
        self,
        id_columns: Sequence[str] = ("Filename", "Scan Name"),
        default_by_row: bool = True,
    ) -> npt.NDArray[Any]:
        """Get an array of identifiers based on the content of one or more columns.

        If one column is provided, the identifiers are the values of that column.
        Two or more columns have their values joined and hashed to create identifiers.

        Args:
            id_columns: meta column names to join for determining group ids.
            default_by_row: separate groups for each row (True) or one group with all
                rows (False).
        """
        if not set(id_columns).issubset(self.get_meta_names()):
            if default_by_row:
                return np.arange(self.n_rows, dtype=np.uint64)
            return np.zeros((self.n_rows,), dtype=np.uint64)

        if len(id_columns) == 1:
            return self.get_meta_values(id_columns[0])

        values = [self.get_meta_values(name) for name in sorted(id_columns)]
        joined = np.asarray(values[0], dtype=str)
        for column in values[1:]:
            separated = np.char.add(joined, "\x1f")
            joined = np.char.add(separated, np.asarray(column, dtype=str))
        return np.asarray(hash_array(np.asarray(joined, dtype=object)), dtype=np.uint64)


    def _set_column(
        self,
        name: str,
        data: npt.NDArray[Any],
        var_type: VarType,
        *,
        role: Role,
        attributes: dict[str, str] | None = None,
        **kwargs,
    ) -> None:
        """Append the column, or replace the same-named one in place."""
        self._check_row_count(name, data)
        columns = self._metas if role == Role.META else self._targets
        position = next(
            (i for i, column in enumerate(columns) if column.name == name),
            None,
        )
        if position is None:
            column = Column.from_data(
                name, data, var_type, role=role, attributes=attributes, **kwargs
            )
            columns.append(column)
            return

        existing_column: Column = columns[position]
        kwargs = {**existing_column.kwargs, **kwargs}
        if existing_column.var_type is VarType.CATEGORICAL is var_type:
            kwargs.setdefault("values", list(existing_column.values))

        columns[position] = Column.from_data(
            name, data, var_type, role=role, attributes=attributes, **kwargs
        )

    def _check_row_count(self, name: str, data: npt.NDArray[Any]) -> None:
        """Raise if a column would not have one value per existing row."""
        if self._features is None and not self._metas and not self._targets:
            return
        n_rows = self.n_rows
        array = np.asarray(data)
        if array.ndim == 0:
            raise ValueError(
                f"Column {name!r} must be a 1-D array with one value per row, "
                f"but a scalar was given; the converter has {n_rows} rows."
            )
        n_values = array.shape[0]
        if n_values != n_rows:
            raise ValueError(
                f"Column {name!r} has {n_values} values, but the converter "
                f"has {n_rows} rows."
            )


[docs]
    def add_metas_from(
        self,
        source: Converter,
        *,
        overwrite: bool = False,
    ) -> Converter:
        """Carry over meta columns from another converter.

        Each source meta is carried over by reference: source and destination
        share the Column objects and their underlying arrays. Metas whose name
        already exists here are skipped unless ``overwrite`` is True, so
        task-computed metas take precedence over carried-over ones.

        Args:
            source: Converter to carry metas over from.
            overwrite: Replace existing same-named metas instead of skipping.

        Returns:
            Self for chaining.

        Raises:
            ValueError: If the source has metas but a different row count.
        """
        if source.metas and source.n_rows != self.n_rows:
            raise ValueError(
                f"Cannot carry over metas: source has {source.n_rows} rows, "
                f"but this converter has {self.n_rows}."
            )
        existing = {column.name: i for i, column in enumerate(self._metas)}
        for column in source.metas:
            if column.name in existing:
                if overwrite:
                    self._metas[existing[column.name]] = column
                continue
            self._metas.append(column)
        return self



[docs]
    def add_target(
        self,
        name: str,
        data: npt.NDArray,
        var_type: VarType = VarType.AUTO,
        attributes: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Converter:
        """Add a target variable, replacing any existing column of that name.

        Targets can only be ContinuousVariable or DiscreteVariable. Name
        collisions are resolved like in ``add_meta``: the existing column is
        replaced in place, keeping its position and (when the variable type is
        unchanged) its category values and constructor arguments.

        Args:
            name: Name of the target variable.
            data: Array of values, one for each row.
            var_type: Optional Orange Variable type.
            attributes: subtitles or meta information for column header.
            **kwargs: Additional arguments for the Variable constructor.

        Returns:
            Self for chaining.
        """
        self._set_column(
            name, data, var_type, role=Role.TARGET, attributes=attributes, **kwargs
        )
        return self



[docs]
    def with_features(
        self,
        names: npt.NDArray[Any],
        data: npt.NDArray[np.float64],
        attributes: list[dict[str, str]] | None = None,
    ) -> Converter:
        """Return a new Converter with the given features and the metas carried over.

        This is the common shape of a task output: spectral data derived from
        the rows of this converter, keeping the metadata. The metas are
        carried over by reference (see ``add_metas_from``); task-computed
        metas can then be added or replaced on the result with ``add_meta``.
        Only metas are carried; any targets on this converter are dropped.

        Args:
            names: 1D array of x-axis values (e.g., energy, wavenumber).
            data: 2D array of spectral intensities with shape (n_rows, n_features).
            attributes: subtitles or meta information for column header.

        Returns:
            A new Converter with the given features and the metas of this one.
        """
        return Converter().add_features(names, data, attributes).add_metas_from(self)



[docs]
    def take_rows(self, indices: Sequence[int]) -> Converter:
        """Return a new Converter containing only the given rows.

        Rows may be reordered or repeated by listing their indices in the
        desired order. The feature x-axis and every column definition (names,
        types, category values) are preserved; only the per-row data is
        subset.

        Args:
            indices: Row positions to keep, in the desired output order.

        Returns:
            A new Converter with the selected rows.

        Raises:
            IndexError: If any index is out of range for the current rows.
        """
        indices = list(indices)
        selected = Converter()
        if self._features is not None:
            selected._features = Features(
                names=self._features.names.copy(),
                data=self._features.data[indices].copy(),
                attributes=deepcopy(self._features.attributes),
            )
        selected._metas = [column.take_rows(indices) for column in self._metas]
        selected._targets = [column.take_rows(indices) for column in self._targets]
        return selected



[docs]
    @classmethod
    def concatenate(cls, converters: Sequence[Converter]) -> Converter:
        """Concatenate several converters row-wise into a new one.

        Each part is an existing Converter contributing its rows. All parts
        must share the same feature x-axis and the same column structure
        (matching meta/target names in the same order).

        Args:
            converters: The parts to assemble, in order.

        Returns:
            A new Converter holding the rows of every part in sequence.

        Raises:
            ValueError: If the sequence is empty or the parts are incompatible.
        """
        converters = list(converters)
        if not converters:
            raise ValueError("concatenate() requires at least one Converter.")

        combined = cls()
        base = converters[0]
        base_has_features = base._features is not None
        if any((c._features is not None) != base_has_features for c in converters):
            raise ValueError(
                "Cannot concatenate converters that mix presence and absence "
                "of features."
            )
        if base._features is not None:
            names = base._features.names
            blocks = []
            for converter in converters:
                if converter._features is None:
                    raise ValueError(
                        "Cannot concatenate converters that mix presence and "
                        "absence of features."
                    )
                if not base._features.matches(converter._features.names):
                    raise ValueError(
                        "Cannot concatenate converters with different feature x-axes."
                    )
                blocks.append(converter._features.data)
            attributes = deepcopy(base._features.attributes)
            combined._features = Features(
                names=names.copy(), data=np.vstack(blocks), attributes=attributes
            )

        combined._metas = cls._concatenate_column_data(
            [c._metas for c in converters], Role.META
        )
        combined._targets = cls._concatenate_column_data(
            [c._targets for c in converters], Role.TARGET
        )
        return combined


    @staticmethod
    def _concatenate_column_data(
        per_converter_columns: list[list[Column]],
        role: Role,
    ) -> list[Column]:
        """Concatenate the row data of position-matched columns across converters."""
        base_columns = per_converter_columns[0]

        if any(len(columns) != len(base_columns) for columns in per_converter_columns):
            raise ValueError(
                "Cannot concatenate converters with different numbers of columns."
            )

        merged_columns: list[Column] = []
        for column_i, base_column in enumerate(base_columns):
            other_columns = (columns[column_i] for columns in per_converter_columns[1:])
            merged_columns.append(base_column.concatenate(*other_columns, role=role))
        return merged_columns


[docs]
    def to_table(self) -> Table:
        """Construct and return the Orange Table.

        The feature variables (the domain attributes) are ContinuousVariables
        named by the x-axis values; the intensity matrix becomes the X block.
        Meta and target variables hold their column data as the metas and Y
        blocks. Targets are typically not used in spectroscopic data.
        """
        feature_vars = self._features.to_variables() if self._features else []
        metas = self._metas
        meta_vars, meta_data = self._columns_to_table(metas)
        target_vars, target_data = self._columns_to_table(self._targets)

        domain = Domain(feature_vars, target_vars, meta_vars)

        if self._features is not None:
            X = self._features.data.astype(np.float64, copy=False)
        else:
            X = np.empty((self.n_rows, 0))

        # Test the number of columns, not the size: a zero-row table must
        # still pass its (0, n) blocks so the domain and data stay consistent.
        return Table.from_numpy(
            domain,
            X=X,
            Y=target_data if target_data.shape[1] > 0 else None,
            metas=meta_data if meta_data.shape[1] > 0 else None,
        )


    def _columns_to_table(
        self, columns: list[Column]
    ) -> tuple[list[Variable], npt.NDArray[Any]]:
        """Build the Orange variables and stacked data for a list of columns."""
        if not columns:
            return [], np.empty((self.n_rows, 0))
        variables = [column.to_variable() for column in columns]
        data = np.column_stack([column.to_object_array() for column in columns])
        return variables, data


[docs]
    @classmethod
    def from_table(cls, table: Table) -> Converter:
        """Create a Converter instance from an existing Orange Table.

        Extracts features, targets, and metadata from the table.
        Feature names are recovered from the table domain attributes.

        Args:
            table: The source Orange Table.
        """
        converter = cls()

        feature_names = cls._recover_feature_names(table.domain.attributes)
        if feature_names:
            converter.add_features(np.array(feature_names), table.X)

        if table.domain.class_vars:
            y_data = table.Y
            if y_data.ndim == 1:
                y_data = y_data.reshape(-1, 1)
            for i, var in enumerate(table.domain.class_vars):
                converter._targets.append(
                    Column.from_variable(var, y_data[:, i], role=Role.TARGET)
                )

        if table.domain.metas:
            for i, var in enumerate(table.domain.metas):
                converter._metas.append(
                    Column.from_variable(var, table.metas[:, i], role=Role.META)
                )

        return converter


    @staticmethod
    def _recover_feature_names(attributes: Sequence[Variable]) -> list[float | str]:
        """Recover the original feature names from variables.

        Variables built by this module carry the original value verbatim in
        ``raw_name``; string values (e.g. scan names) must not be re-parsed.
        For variables from other sources the name is parsed as a float when
        possible, since feature names are typically energies.
        """
        names: list[float | str] = []
        for var in attributes:
            raw_name = getattr(var, "raw_name", None)
            if raw_name is not None:
                names.append(raw_name)
                continue
            try:
                names.append(float(var.name))
            except (ValueError, TypeError):
                names.append(var.name)
        return names