Skip to content

pyoframe

Pyoframe's public API. Also applies the monkey patch to the DataFrame libraries.

Config

reset_defaults() classmethod

Resets all configuration options to their default values.

Source code in pyoframe/constants.py
@classmethod
def reset_defaults(cls):
    """
    Resets all configuration options to their default values.
    """
    for key, value in cls._defaults.items():
        setattr(cls, key, value)

Constraint(lhs, sense)

Bases: ModelElementWithId

A linear programming constraint.

Parameters:

Name Type Description Default
lhs Expression

Expression The left hand side of the constraint.

required
sense ConstraintSense

Sense The sense of the constraint.

required
Source code in pyoframe/core.py
def __init__(self, lhs: Expression, sense: ConstraintSense):
    """Initialize a constraint.

    Parameters:
        lhs: Expression
            The left hand side of the constraint.
        sense: Sense
            The sense of the constraint.
    """
    self.lhs = lhs
    self._model = lhs._model
    self.sense = sense
    self.to_relax: Optional[FuncArgs] = None

    dims = self.lhs.dimensions
    data = pl.DataFrame() if dims is None else self.lhs.data.select(dims).unique()

    super().__init__(data)

slack property writable

The slack of the constraint. Will raise an error if the model has not already been solved. The first call to this property will load the slack values from the solver (lazy loading).

relax(cost, max=None)

Relaxes the constraint by adding a variable to the constraint that can be non-zero at a cost.

Parameters:

Name Type Description Default
cost SupportsToExpr

SupportsToExpr The cost of relaxing the constraint. Costs should be positives as they will automatically become negative for maximization problems.

required
max Optional[SupportsToExpr]

SupportsToExpr, default None The maximum value of the relaxation variable.

None

Returns:

Type Description
Constraint

The same constraint

Examples:

>>> import pyoframe as pf
>>> m = pf.Model("max")
>>> homework_due_tomorrow = pl.DataFrame({"project": ["A", "B", "C"], "cost_per_hour_underdelivered": [10, 20, 30], "hours_to_finish": [9, 9, 9], "max_underdelivered": [1, 9, 9]})
>>> m.hours_spent = pf.Variable(homework_due_tomorrow[["project"]], lb=0)
>>> m.must_finish_project = m.hours_spent >= homework_due_tomorrow[["project", "hours_to_finish"]]
>>> m.only_one_day = sum("project", m.hours_spent) <= 24
>>> _ = m.must_finish_project.relax(homework_due_tomorrow[["project", "cost_per_hour_underdelivered"]], max=homework_due_tomorrow[["project", "max_underdelivered"]])
>>> _ = m.solve(log_to_console=False)

Writing ...
>>> m.hours_spent.solution
shape: (3, 2)
┌─────────┬──────────┐
│ project ┆ solution │
│ ---     ┆ ---      │
│ str     ┆ f64      │
╞═════════╪══════════╡
│ A       ┆ 8.0      │
│ B       ┆ 7.0      │
│ C       ┆ 9.0      │
└─────────┴──────────┘
>>> # It can also be done all in one go!
>>> m = pf.Model("max")
>>> homework_due_tomorrow = pl.DataFrame({"project": ["A", "B", "C"], "cost_per_hour_underdelivered": [10, 20, 30], "hours_to_finish": [9, 9, 9], "max_underdelivered": [1, 9, 9]})
>>> m.hours_spent = pf.Variable(homework_due_tomorrow[["project"]], lb=0)
>>> m.must_finish_project = (m.hours_spent >= homework_due_tomorrow[["project", "hours_to_finish"]]).relax(5)
>>> m.only_one_day = (sum("project", m.hours_spent) <= 24).relax(1)
>>> _ = m.solve(log_to_console=False)

Writing ...
>>> m.objective.value
-3.0
>>> m.hours_spent.solution
shape: (3, 2)
┌─────────┬──────────┐
│ project ┆ solution │
│ ---     ┆ ---      │
│ str     ┆ f64      │
╞═════════╪══════════╡
│ A       ┆ 9.0      │
│ B       ┆ 9.0      │
│ C       ┆ 9.0      │
└─────────┴──────────┘
Source code in pyoframe/core.py
def relax(
    self, cost: SupportsToExpr, max: Optional[SupportsToExpr] = None
) -> Constraint:
    """
    Relaxes the constraint by adding a variable to the constraint that can be non-zero at a cost.

    Parameters:
        cost: SupportsToExpr
            The cost of relaxing the constraint. Costs should be positives as they will automatically
            become negative for maximization problems.
        max: SupportsToExpr, default None
            The maximum value of the relaxation variable.

    Returns:
        The same constraint

    Examples:
        >>> import pyoframe as pf
        >>> m = pf.Model("max")
        >>> homework_due_tomorrow = pl.DataFrame({"project": ["A", "B", "C"], "cost_per_hour_underdelivered": [10, 20, 30], "hours_to_finish": [9, 9, 9], "max_underdelivered": [1, 9, 9]})
        >>> m.hours_spent = pf.Variable(homework_due_tomorrow[["project"]], lb=0)
        >>> m.must_finish_project = m.hours_spent >= homework_due_tomorrow[["project", "hours_to_finish"]]
        >>> m.only_one_day = sum("project", m.hours_spent) <= 24
        >>> _ = m.must_finish_project.relax(homework_due_tomorrow[["project", "cost_per_hour_underdelivered"]], max=homework_due_tomorrow[["project", "max_underdelivered"]])
        >>> _ = m.solve(log_to_console=False) # doctest: +ELLIPSIS
        \rWriting ...
        >>> m.hours_spent.solution
        shape: (3, 2)
        ┌─────────┬──────────┐
        │ project ┆ solution │
        │ ---     ┆ ---      │
        │ str     ┆ f64      │
        ╞═════════╪══════════╡
        │ A       ┆ 8.0      │
        │ B       ┆ 7.0      │
        │ C       ┆ 9.0      │
        └─────────┴──────────┘


        >>> # It can also be done all in one go!
        >>> m = pf.Model("max")
        >>> homework_due_tomorrow = pl.DataFrame({"project": ["A", "B", "C"], "cost_per_hour_underdelivered": [10, 20, 30], "hours_to_finish": [9, 9, 9], "max_underdelivered": [1, 9, 9]})
        >>> m.hours_spent = pf.Variable(homework_due_tomorrow[["project"]], lb=0)
        >>> m.must_finish_project = (m.hours_spent >= homework_due_tomorrow[["project", "hours_to_finish"]]).relax(5)
        >>> m.only_one_day = (sum("project", m.hours_spent) <= 24).relax(1)
        >>> _ = m.solve(log_to_console=False) # doctest: +ELLIPSIS
        \rWriting ...
        >>> m.objective.value
        -3.0
        >>> m.hours_spent.solution
        shape: (3, 2)
        ┌─────────┬──────────┐
        │ project ┆ solution │
        │ ---     ┆ ---      │
        │ str     ┆ f64      │
        ╞═════════╪══════════╡
        │ A       ┆ 9.0      │
        │ B       ┆ 9.0      │
        │ C       ┆ 9.0      │
        └─────────┴──────────┘
    """
    m = self._model
    if m is None or self.name is None:
        self.to_relax = FuncArgs(args=[cost, max])
        return self

    var_name = f"{self.name}_relaxation"
    assert not hasattr(
        m, var_name
    ), "Conflicting names, relaxation variable already exists on the model."
    var = Variable(self, lb=0, ub=max)

    if self.sense == ConstraintSense.LE:
        self.lhs -= var
    elif self.sense == ConstraintSense.GE:
        self.lhs += var
    else:  # pragma: no cover
        # TODO
        raise NotImplementedError(
            "Relaxation for equalities has not yet been implemented. Submit a pull request!"
        )

    setattr(m, var_name, var)
    penalty = var * cost
    if self.dimensions:
        penalty = sum(self.dimensions, penalty)
    if m.sense == ObjSense.MAX:
        penalty *= -1
    if m.objective is None:
        m.objective = penalty
    else:
        m.objective += penalty

    return self

Expression(data)

Bases: ModelElement, SupportsMath, SupportPolarsMethodMixin

A linear expression.

df = pd.DataFrame({"item" : [1, 1, 1, 2, 2], "time": ["mon", "tue", "wed", "mon", "tue"], "cost": [1, 2, 3, 4, 5]}).set_index(["item", "time"]) m = Model("min") m.Time = Variable(df.index) m.Size = Variable(df.index) expr = df["cost"] * m.Time + df["cost"] * m.Size expr [1,mon]: Time[1,mon] + Size[1,mon][1,tue]: 2 Time[1,tue] +2 Size[1,tue][1,wed]: 3 Time[1,wed] +3 Size[1,wed][2,mon]: 4 Time[2,mon] +4 Size[2,mon][2,tue]: 5 Time[2,tue] +5 Size[2,tue]

Source code in pyoframe/core.py
def __init__(self, data: pl.DataFrame):
    """
    >>> import pandas as pd
    >>> from pyoframe import Variable, Model
    >>> df = pd.DataFrame({"item" : [1, 1, 1, 2, 2], "time": ["mon", "tue", "wed", "mon", "tue"], "cost": [1, 2, 3, 4, 5]}).set_index(["item", "time"])
    >>> m = Model("min")
    >>> m.Time = Variable(df.index)
    >>> m.Size = Variable(df.index)
    >>> expr = df["cost"] * m.Time + df["cost"] * m.Size
    >>> expr
    <Expression size=5 dimensions={'item': 2, 'time': 3} terms=10>
    [1,mon]: Time[1,mon] + Size[1,mon]
    [1,tue]: 2 Time[1,tue] +2 Size[1,tue]
    [1,wed]: 3 Time[1,wed] +3 Size[1,wed]
    [2,mon]: 4 Time[2,mon] +4 Size[2,mon]
    [2,tue]: 5 Time[2,tue] +5 Size[2,tue]
    """
    # Sanity checks, VAR_KEY and COEF_KEY must be present
    assert VAR_KEY in data.columns, "Missing variable column."
    assert COEF_KEY in data.columns, "Missing coefficient column."

    # Sanity check no duplicates indices
    if Config.enable_is_duplicated_expression_safety_check:
        duplicated_mask = data.drop(COEF_KEY).is_duplicated()
        # In theory this should never happen unless there's a bug in the library
        if duplicated_mask.any():  # pragma: no cover
            duplicated_data = data.filter(duplicated_mask)
            raise ValueError(
                f"Cannot create an expression with duplicate indices:\n{duplicated_data}."
            )

    super().__init__(data)

value: pl.DataFrame property

The value of the expression. Only available after the model has been solved.

Examples:

>>> import pyoframe as pf
>>> m = pf.Model("max")
>>> m.X = pf.Variable({"dim1": [1, 2, 3]}, ub=10)
>>> m.expr_1 = 2 * m.X + 1
>>> m.expr_2 = pf.sum(m.expr_1)
>>> m.objective = m.expr_2 - 3
>>> result = m.solve(log_to_console=False)

...
>>> m.expr_1.value
shape: (3, 2)
┌──────┬──────────┐
│ dim1 ┆ solution │
│ ---  ┆ ---      │
│ i64  ┆ f64      │
╞══════╪══════════╡
│ 1    ┆ 21.0     │
│ 2    ┆ 21.0     │
│ 3    ┆ 21.0     │
└──────┴──────────┘
>>> m.expr_2.value
63.0

__add__(other)

Examples:

>>> import pandas as pd
>>> from pyoframe import Variable
>>> add = pd.DataFrame({"dim1": [1,2,3], "add": [10, 20, 30]}).to_expr()
>>> var = Variable(add)
>>> var + add
<Expression size=3 dimensions={'dim1': 3} terms=6>
[1]: x1 +10
[2]: x2 +20
[3]: x3 +30
>>> var + add + 2
<Expression size=3 dimensions={'dim1': 3} terms=6>
[1]: x1 +12
[2]: x2 +22
[3]: x3 +32
>>> var + pd.DataFrame({"dim1": [1,2], "add": [10, 20]})
Traceback (most recent call last):
...
pyoframe.constants.PyoframeError: Failed to add expressions:
<Expression size=3 dimensions={'dim1': 3} terms=3> + <Expression size=2 dimensions={'dim1': 2} terms=2>
Due to error:
Dataframe has unmatched values. If this is intentional, use .drop_unmatched() or .keep_unmatched()
shape: (1, 2)
┌──────┬────────────┐
│ dim1 ┆ dim1_right │
│ ---  ┆ ---        │
│ i64  ┆ i64        │
╞══════╪════════════╡
│ 3    ┆ null       │
└──────┴────────────┘
>>> 5 + 2 * Variable()
<Expression size=1 dimensions={} terms=2>
2 x4 +5
Source code in pyoframe/core.py
def __add__(self, other):
    """
    Examples:
        >>> import pandas as pd
        >>> from pyoframe import Variable
        >>> add = pd.DataFrame({"dim1": [1,2,3], "add": [10, 20, 30]}).to_expr()
        >>> var = Variable(add)
        >>> var + add
        <Expression size=3 dimensions={'dim1': 3} terms=6>
        [1]: x1 +10
        [2]: x2 +20
        [3]: x3 +30
        >>> var + add + 2
        <Expression size=3 dimensions={'dim1': 3} terms=6>
        [1]: x1 +12
        [2]: x2 +22
        [3]: x3 +32
        >>> var + pd.DataFrame({"dim1": [1,2], "add": [10, 20]})
        Traceback (most recent call last):
        ...
        pyoframe.constants.PyoframeError: Failed to add expressions:
        <Expression size=3 dimensions={'dim1': 3} terms=3> + <Expression size=2 dimensions={'dim1': 2} terms=2>
        Due to error:
        Dataframe has unmatched values. If this is intentional, use .drop_unmatched() or .keep_unmatched()
        shape: (1, 2)
        ┌──────┬────────────┐
        │ dim1 ┆ dim1_right │
        │ ---  ┆ ---        │
        │ i64  ┆ i64        │
        ╞══════╪════════════╡
        │ 3    ┆ null       │
        └──────┴────────────┘
        >>> 5 + 2 * Variable()
        <Expression size=1 dimensions={} terms=2>
        2 x4 +5
    """
    if isinstance(other, str):
        raise ValueError(
            "Cannot add a string to an expression. Perhaps you meant to use pf.sum() instead of sum()?"
        )
    if isinstance(other, (int, float)):
        return self._add_const(other)
    other = other.to_expr()
    self._learn_from_other(other)
    return _add_expressions(self, other)

map(mapping_set, drop_shared_dims=True)

Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.

This is particularly useful to go from one type of dimensions to another. For example, to convert data that is indexed by city to data indexed by country (see example).

Parameters:

Name Type Description Default
mapping_set

SetTypes The set to map the expression to. This can be a DataFrame, Index, or another Set.

required
drop_shared_dims

bool, default True If True, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and repeated rows are summed. If False, the shared dimensions are kept in the resulting expression.

True

Returns:

Type Description

Expression A new Expression containing the result of the mapping operation.

Examples:

import polars as pl from pyoframe import Variable, Model pop_data = pl.DataFrame({"city": ["Toronto", "Vancouver", "Boston"], "year": [2024, 2024, 2024], "population": [10, 2, 8]}).to_expr() cities_and_countries = pl.DataFrame({"city": ["Toronto", "Vancouver", "Boston"], "country": ["Canada", "Canada", "USA"]}) pop_data.map(cities_and_countries)

pop_data.map(cities_and_countries, drop_shared_dims=False)

Source code in pyoframe/core.py
def map(self, mapping_set: SetTypes, drop_shared_dims: bool = True):
    """
    Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.

    This is particularly useful to go from one type of dimensions to another. For example, to convert data that
    is indexed by city to data indexed by country (see example).

    Parameters:
        mapping_set : SetTypes
            The set to map the expression to. This can be a DataFrame, Index, or another Set.
        drop_shared_dims : bool, default True
            If True, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and
                repeated rows are summed.
            If False, the shared dimensions are kept in the resulting expression.

    Returns:
        Expression
            A new Expression containing the result of the mapping operation.

    Examples:

    >>> import polars as pl
    >>> from pyoframe import Variable, Model
    >>> pop_data = pl.DataFrame({"city": ["Toronto", "Vancouver", "Boston"], "year": [2024, 2024, 2024], "population": [10, 2, 8]}).to_expr()
    >>> cities_and_countries = pl.DataFrame({"city": ["Toronto", "Vancouver", "Boston"], "country": ["Canada", "Canada", "USA"]})
    >>> pop_data.map(cities_and_countries)
    <Expression size=2 dimensions={'year': 1, 'country': 2} terms=2>
    [2024,Canada]: 12
    [2024,USA]: 8

    >>> pop_data.map(cities_and_countries, drop_shared_dims=False)
    <Expression size=3 dimensions={'city': 3, 'year': 1, 'country': 2} terms=3>
    [Toronto,2024,Canada]: 10
    [Vancouver,2024,Canada]: 2
    [Boston,2024,USA]: 8
    """
    mapping_set = Set(mapping_set)

    dims = self.dimensions
    if dims is None:
        raise ValueError("Cannot use .map() on an expression with no dimensions.")

    mapping_dims = mapping_set.dimensions
    if mapping_dims is None:
        raise ValueError(
            "Cannot use .map() with a mapping set containing no dimensions."
        )

    shared_dims = [dim for dim in dims if dim in mapping_dims]
    if not shared_dims:
        raise ValueError(
            f"Cannot apply .map() as there are no shared dimensions between the expression (dims={self.dimensions}) and the mapping set (dims={mapping_set.dimensions})."
        )

    mapped_expression = self * mapping_set

    if drop_shared_dims:
        return sum(shared_dims, mapped_expression)

    return mapped_expression

rolling_sum(over, window_size)

Calculates the rolling sum of the Expression over a specified window size for a given dimension.

This method applies a rolling sum operation over the dimension specified by over, using a window defined by window_size.

Parameters:

Name Type Description Default
over

str The name of the dimension (column) over which the rolling sum is calculated. This dimension must exist within the Expression's dimensions.

required
window_size

int The size of the moving window in terms of number of records. The rolling sum is calculated over this many consecutive elements.

required

Returns:

Type Description

Expression A new Expression instance containing the result of the rolling sum operation. This new Expression retains all dimensions (columns) of the original data, with the rolling sum applied over the specified dimension.

Examples:

>>> import polars as pl
>>> from pyoframe import Variable, Model
>>> cost = pl.DataFrame({"item" : [1, 1, 1, 2, 2], "time": [1, 2, 3, 1, 2], "cost": [1, 2, 3, 4, 5]})
>>> m = Model("min")
>>> m.quantity = Variable(cost[["item", "time"]])
>>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
<Expression size=5 dimensions={'item': 2, 'time': 3} terms=8>
[1,1]: quantity[1,1]
[1,2]: quantity[1,1] +2 quantity[1,2]
[1,3]: 2 quantity[1,2] +3 quantity[1,3]
[2,1]: 4 quantity[2,1]
[2,2]: 4 quantity[2,1] +5 quantity[2,2]
Source code in pyoframe/core.py
def rolling_sum(self, over: str, window_size: int):
    """
    Calculates the rolling sum of the Expression over a specified window size for a given dimension.

    This method applies a rolling sum operation over the dimension specified by `over`,
    using a window defined by `window_size`.


    Parameters:
        over : str
            The name of the dimension (column) over which the rolling sum is calculated.
            This dimension must exist within the Expression's dimensions.
        window_size : int
            The size of the moving window in terms of number of records.
            The rolling sum is calculated over this many consecutive elements.

    Returns:
        Expression
            A new Expression instance containing the result of the rolling sum operation.
            This new Expression retains all dimensions (columns) of the original data,
            with the rolling sum applied over the specified dimension.

    Examples:
        >>> import polars as pl
        >>> from pyoframe import Variable, Model
        >>> cost = pl.DataFrame({"item" : [1, 1, 1, 2, 2], "time": [1, 2, 3, 1, 2], "cost": [1, 2, 3, 4, 5]})
        >>> m = Model("min")
        >>> m.quantity = Variable(cost[["item", "time"]])
        >>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
        <Expression size=5 dimensions={'item': 2, 'time': 3} terms=8>
        [1,1]: quantity[1,1]
        [1,2]: quantity[1,1] +2 quantity[1,2]
        [1,3]: 2 quantity[1,2] +3 quantity[1,3]
        [2,1]: 4 quantity[2,1]
        [2,2]: 4 quantity[2,1] +5 quantity[2,2]
    """
    dims = self.dimensions
    if dims is None:
        raise ValueError(
            "Cannot use rolling_sum() with an expression with no dimensions."
        )
    assert over in dims, f"Cannot sum over {over} as it is not in {dims}"
    remaining_dims = [dim for dim in dims if dim not in over]

    return self._new(
        pl.concat(
            [
                df.with_columns(pl.col(over).max())
                for _, df in self.data.rolling(
                    index_column=over,
                    period=f"{window_size}i",
                    group_by=remaining_dims,
                )
            ]
        )
    )

sum(over)

Examples:

>>> import pandas as pd
>>> from pyoframe import Variable
>>> df = pd.DataFrame({"item" : [1, 1, 1, 2, 2], "time": ["mon", "tue", "wed", "mon", "tue"], "cost": [1, 2, 3, 4, 5]}).set_index(["item", "time"])
>>> quantity = Variable(df.reset_index()[["item"]].drop_duplicates())
>>> expr = (quantity * df["cost"]).sum("time")
>>> expr.data
shape: (2, 3)
┌──────┬─────────┬───────────────┐
│ item ┆ __coeff ┆ __variable_id │
│ ---  ┆ ---     ┆ ---           │
│ i64  ┆ f64     ┆ u32           │
╞══════╪═════════╪═══════════════╡
│ 1    ┆ 6.0     ┆ 1             │
│ 2    ┆ 9.0     ┆ 2             │
└──────┴─────────┴───────────────┘
Source code in pyoframe/core.py
def sum(self, over: Union[str, Iterable[str]]):
    """
    Examples:
        >>> import pandas as pd
        >>> from pyoframe import Variable
        >>> df = pd.DataFrame({"item" : [1, 1, 1, 2, 2], "time": ["mon", "tue", "wed", "mon", "tue"], "cost": [1, 2, 3, 4, 5]}).set_index(["item", "time"])
        >>> quantity = Variable(df.reset_index()[["item"]].drop_duplicates())
        >>> expr = (quantity * df["cost"]).sum("time")
        >>> expr.data
        shape: (2, 3)
        ┌──────┬─────────┬───────────────┐
        │ item ┆ __coeff ┆ __variable_id │
        │ ---  ┆ ---     ┆ ---           │
        │ i64  ┆ f64     ┆ u32           │
        ╞══════╪═════════╪═══════════════╡
        │ 1    ┆ 6.0     ┆ 1             │
        │ 2    ┆ 9.0     ┆ 2             │
        └──────┴─────────┴───────────────┘
    """
    if isinstance(over, str):
        over = [over]
    dims = self.dimensions
    if not dims:
        raise ValueError(
            f"Cannot sum over dimensions {over} since the current expression has no dimensions."
        )
    assert set(over) <= set(dims), f"Cannot sum over {over} as it is not in {dims}"
    remaining_dims = [dim for dim in dims if dim not in over]

    return self._new(
        self.data.drop(over)
        .group_by(remaining_dims + [VAR_KEY], maintain_order=True)
        .sum()
    )

within(set)

Examples

import pandas as pd general_expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}).to_expr() filter_expr = pd.DataFrame({"dim1": [1, 3], "value": [5, 6]}).to_expr() general_expr.within(filter_expr).data shape: (2, 3) ┌──────┬─────────┬───────────────┐ │ dim1 ┆ __coeff ┆ __variable_id │ │ --- ┆ --- ┆ --- │ │ i64 ┆ f64 ┆ u32 │ ╞══════╪═════════╪═══════════════╡ │ 1 ┆ 1.0 ┆ 0 │ │ 3 ┆ 3.0 ┆ 0 │ └──────┴─────────┴───────────────┘

Source code in pyoframe/core.py
def within(self, set: "SetTypes") -> Expression:
    """
    Examples
    >>> import pandas as pd
    >>> general_expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}).to_expr()
    >>> filter_expr = pd.DataFrame({"dim1": [1, 3], "value": [5, 6]}).to_expr()
    >>> general_expr.within(filter_expr).data
    shape: (2, 3)
    ┌──────┬─────────┬───────────────┐
    │ dim1 ┆ __coeff ┆ __variable_id │
    │ ---  ┆ ---     ┆ ---           │
    │ i64  ┆ f64     ┆ u32           │
    ╞══════╪═════════╪═══════════════╡
    │ 1    ┆ 1.0     ┆ 0             │
    │ 3    ┆ 3.0     ┆ 0             │
    └──────┴─────────┴───────────────┘
    """
    df: pl.DataFrame = Set(set).data
    set_dims = _get_dimensions(df)
    assert (
        set_dims is not None
    ), "Cannot use .within() with a set with no dimensions."
    dims = self.dimensions
    assert (
        dims is not None
    ), "Cannot use .within() with an expression with no dimensions."
    dims_in_common = [dim for dim in dims if dim in set_dims]
    by_dims = df.select(dims_in_common).unique(maintain_order=True)
    return self._new(self.data.join(by_dims, on=dims_in_common))

Model(min_or_max, name=None, **kwargs)

Bases: AttrContainerMixin

Represents a mathematical optimization model. Add variables, constraints, and an objective to the model by setting attributes.

Source code in pyoframe/model.py
def __init__(self, min_or_max: Union[ObjSense, ObjSenseValue], name=None, **kwargs):
    super().__init__(**kwargs)
    self._variables: List[Variable] = []
    self._constraints: List[Constraint] = []
    self.sense = ObjSense(min_or_max)
    self._objective: Optional[Objective] = None
    self.var_map = (
        NamedVariableMapper(Variable) if Config.print_uses_variable_names else None
    )
    self.io_mappers: Optional[IOMappers] = None
    self.name = name
    self.solver: Optional[Solver] = None
    self.solver_model: Optional[Any] = None
    self.params = Container()
    self.result: Optional[Result] = None

Set(*data, **named_data)

Bases: ModelElement, SupportsMath, SupportPolarsMethodMixin

A set which can then be used to index variables.

Examples:

>>> import pyoframe as pf
>>> pf.Set(x=range(2), y=range(3))
<Set size=6 dimensions={'x': 2, 'y': 3}>
[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
Source code in pyoframe/core.py
def __init__(self, *data: SetTypes | Iterable[SetTypes], **named_data):
    data_list = list(data)
    for name, set in named_data.items():
        data_list.append({name: set})
    df = self._parse_acceptable_sets(*data_list)
    if not df.is_empty() and df.is_duplicated().any():
        raise ValueError("Duplicate rows found in input data.")
    super().__init__(df)

Variable(*indexing_sets, lb=None, ub=None, vtype=VType.CONTINUOUS, equals=None)

Bases: ModelElementWithId, SupportsMath, SupportPolarsMethodMixin

Represents one or many decision variable in an optimization model.

Parameters:

Name Type Description Default
*indexing_sets SetTypes | Iterable[SetTypes]

SetTypes (typically a DataFrame or Set) If no indexing_sets are provided, a single variable with no dimensions is created. Otherwise, a variable is created for each element in the Cartesian product of the indexing_sets (see Set for details on behaviour).

()
lb float | int | SupportsToExpr | None

float The lower bound for all variables.

None
ub float | int | SupportsToExpr | None

float The upper bound for all variables.

None
vtype VType | VTypeValue

VType | VTypeValue The type of the variable. Can be either a VType enum or a string. Default is VType.CONTINUOUS.

CONTINUOUS
equals Optional[SupportsMath]

SupportsToExpr When specified, a variable is created and a constraint is added to make the variable equal to the provided expression.

None

Examples:

>>> import pandas as pd
>>> from pyoframe import Variable
>>> df = pd.DataFrame({"dim1": [1, 1, 2, 2, 3, 3], "dim2": ["a", "b", "a", "b", "a", "b"]})
>>> Variable(df)
<Variable lb=-inf ub=inf size=6 dimensions={'dim1': 3, 'dim2': 2}>
[1,a]: x1
[1,b]: x2
[2,a]: x3
[2,b]: x4
[3,a]: x5
[3,b]: x6
>>> Variable(df[["dim1"]])
Traceback (most recent call last):
...
ValueError: Duplicate rows found in input data.
>>> Variable(df[["dim1"]].drop_duplicates())
<Variable lb=-inf ub=inf size=3 dimensions={'dim1': 3}>
[1]: x7
[2]: x8
[3]: x9
Source code in pyoframe/core.py
def __init__(
    self,
    *indexing_sets: SetTypes | Iterable[SetTypes],
    lb: float | int | SupportsToExpr | None = None,
    ub: float | int | SupportsToExpr | None = None,
    vtype: VType | VTypeValue = VType.CONTINUOUS,
    equals: Optional[SupportsMath] = None,
):
    if lb is None:
        lb = float("-inf")
    if ub is None:
        ub = float("inf")
    if equals is not None:
        assert (
            len(indexing_sets) == 0
        ), "Cannot specify both 'equals' and 'indexing_sets'"
        indexing_sets = (equals,)

    data = Set(*indexing_sets).data if len(indexing_sets) > 0 else pl.DataFrame()
    super().__init__(data)

    self.vtype: VType = VType(vtype)
    self._equals = equals

    # Tightening the bounds is not strictly necessary, but it adds clarity
    if self.vtype == VType.BINARY:
        lb, ub = 0, 1

    if isinstance(lb, (float, int)):
        self.lb, self.lb_constraint = lb, None
    else:
        self.lb, self.lb_constraint = float("-inf"), lb <= self

    if isinstance(ub, (float, int)):
        self.ub, self.ub_constraint = ub, None
    else:
        self.ub, self.ub_constraint = float("inf"), self <= ub

RC property writable

The reduced cost of the variable. Will raise an error if the model has not already been solved. The first call to this property will load the reduced costs from the solver (lazy loading).

next(dim, wrap_around=False)

Creates an expression where the variable at each index is the next variable in the specified dimension.

Parameters:

Name Type Description Default
dim str

The dimension over which to shift the variable.

required
wrap_around bool

If True, the last index in the dimension is connected to the first index.

False

Examples:

>>> import pandas as pd
>>> from pyoframe import Variable, Model
>>> time_dim = pd.DataFrame({"time": ["00:00", "06:00", "12:00", "18:00"]})
>>> space_dim = pd.DataFrame({"city": ["Toronto", "Berlin"]})
>>> m = Model("min")
>>> m.bat_charge = Variable(time_dim, space_dim)
>>> m.bat_flow = Variable(time_dim, space_dim)
>>> # Fails because the dimensions are not the same
>>> m.bat_charge + m.bat_flow == m.bat_charge.next("time")
Traceback (most recent call last):
...
pyoframe.constants.PyoframeError: Failed to add expressions:
<Expression size=8 dimensions={'time': 4, 'city': 2} terms=16> + <Expression size=6 dimensions={'city': 2, 'time': 3} terms=6>
Due to error:
Dataframe has unmatched values. If this is intentional, use .drop_unmatched() or .keep_unmatched()
shape: (2, 4)
┌───────┬─────────┬────────────┬────────────┐
│ time  ┆ city    ┆ time_right ┆ city_right │
│ ---   ┆ ---     ┆ ---        ┆ ---        │
│ str   ┆ str     ┆ str        ┆ str        │
╞═══════╪═════════╪════════════╪════════════╡
│ 18:00 ┆ Toronto ┆ null       ┆ null       │
│ 18:00 ┆ Berlin  ┆ null       ┆ null       │
└───────┴─────────┴────────────┴────────────┘
>>> (m.bat_charge + m.bat_flow).drop_unmatched() == m.bat_charge.next("time")
<Constraint sense='=' size=6 dimensions={'time': 3, 'city': 2} terms=18>
[00:00,Berlin]: bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] = 0
[00:00,Toronto]: bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] - bat_charge[06:00,Toronto] = 0
[06:00,Berlin]: bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] = 0
[06:00,Toronto]: bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] - bat_charge[12:00,Toronto] = 0
[12:00,Berlin]: bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] = 0
[12:00,Toronto]: bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] - bat_charge[18:00,Toronto] = 0
>>> (m.bat_charge + m.bat_flow) == m.bat_charge.next("time", wrap_around=True)
<Constraint sense='=' size=8 dimensions={'time': 4, 'city': 2} terms=24>
[00:00,Berlin]: bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] = 0
[00:00,Toronto]: bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] - bat_charge[06:00,Toronto] = 0
[06:00,Berlin]: bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] = 0
[06:00,Toronto]: bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] - bat_charge[12:00,Toronto] = 0
[12:00,Berlin]: bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] = 0
[12:00,Toronto]: bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] - bat_charge[18:00,Toronto] = 0
[18:00,Berlin]: bat_charge[18:00,Berlin] + bat_flow[18:00,Berlin] - bat_charge[00:00,Berlin] = 0
[18:00,Toronto]: bat_charge[18:00,Toronto] + bat_flow[18:00,Toronto] - bat_charge[00:00,Toronto] = 0
Source code in pyoframe/core.py
def next(self, dim: str, wrap_around: bool = False) -> Expression:
    """
    Creates an expression where the variable at each index is the next variable in the specified dimension.

    Parameters:
        dim:
            The dimension over which to shift the variable.
        wrap_around:
            If True, the last index in the dimension is connected to the first index.

    Examples:
        >>> import pandas as pd
        >>> from pyoframe import Variable, Model
        >>> time_dim = pd.DataFrame({"time": ["00:00", "06:00", "12:00", "18:00"]})
        >>> space_dim = pd.DataFrame({"city": ["Toronto", "Berlin"]})
        >>> m = Model("min")
        >>> m.bat_charge = Variable(time_dim, space_dim)
        >>> m.bat_flow = Variable(time_dim, space_dim)
        >>> # Fails because the dimensions are not the same
        >>> m.bat_charge + m.bat_flow == m.bat_charge.next("time")
        Traceback (most recent call last):
        ...
        pyoframe.constants.PyoframeError: Failed to add expressions:
        <Expression size=8 dimensions={'time': 4, 'city': 2} terms=16> + <Expression size=6 dimensions={'city': 2, 'time': 3} terms=6>
        Due to error:
        Dataframe has unmatched values. If this is intentional, use .drop_unmatched() or .keep_unmatched()
        shape: (2, 4)
        ┌───────┬─────────┬────────────┬────────────┐
        │ time  ┆ city    ┆ time_right ┆ city_right │
        │ ---   ┆ ---     ┆ ---        ┆ ---        │
        │ str   ┆ str     ┆ str        ┆ str        │
        ╞═══════╪═════════╪════════════╪════════════╡
        │ 18:00 ┆ Toronto ┆ null       ┆ null       │
        │ 18:00 ┆ Berlin  ┆ null       ┆ null       │
        └───────┴─────────┴────────────┴────────────┘

        >>> (m.bat_charge + m.bat_flow).drop_unmatched() == m.bat_charge.next("time")
        <Constraint sense='=' size=6 dimensions={'time': 3, 'city': 2} terms=18>
        [00:00,Berlin]: bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] = 0
        [00:00,Toronto]: bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] - bat_charge[06:00,Toronto] = 0
        [06:00,Berlin]: bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] = 0
        [06:00,Toronto]: bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] - bat_charge[12:00,Toronto] = 0
        [12:00,Berlin]: bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] = 0
        [12:00,Toronto]: bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] - bat_charge[18:00,Toronto] = 0

        >>> (m.bat_charge + m.bat_flow) == m.bat_charge.next("time", wrap_around=True)
        <Constraint sense='=' size=8 dimensions={'time': 4, 'city': 2} terms=24>
        [00:00,Berlin]: bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] = 0
        [00:00,Toronto]: bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] - bat_charge[06:00,Toronto] = 0
        [06:00,Berlin]: bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] = 0
        [06:00,Toronto]: bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] - bat_charge[12:00,Toronto] = 0
        [12:00,Berlin]: bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] = 0
        [12:00,Toronto]: bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] - bat_charge[18:00,Toronto] = 0
        [18:00,Berlin]: bat_charge[18:00,Berlin] + bat_flow[18:00,Berlin] - bat_charge[00:00,Berlin] = 0
        [18:00,Toronto]: bat_charge[18:00,Toronto] + bat_flow[18:00,Toronto] - bat_charge[00:00,Toronto] = 0
    """

    wrapped = self.data.select(dim).unique(maintain_order=True).sort(by=dim)
    wrapped = wrapped.with_columns(pl.col(dim).shift(-1).alias("__next"))
    if wrap_around:
        wrapped = wrapped.with_columns(pl.col("__next").fill_null(pl.first(dim)))
    else:
        wrapped = wrapped.drop_nulls(dim)

    expr = self.to_expr()
    data = expr.data.rename({dim: "__prev"})

    if POLARS_VERSION.major < 1:
        data = data.join(
            wrapped, left_on="__prev", right_on="__next", how="inner"
        ).drop(["__prev", "__next"])
    else:
        data = data.join(
            wrapped, left_on="__prev", right_on="__next", how="inner"
        ).drop(["__prev", "__next"], strict=False)
    return expr._new(data)