Skip to content

Expression

Bases: BaseOperableBlock

Represents a linear or quadratic mathematical expression.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame(
...     {
...         "item": [1, 1, 1, 2, 2],
...         "time": ["mon", "tue", "wed", "mon", "tue"],
...         "cost": [1, 2, 3, 4, 5],
...     }
... ).set_index(["item", "time"])
>>> m = pf.Model()
>>> m.Time = pf.Variable(df.index)
>>> m.Size = pf.Variable(df.index)
>>> expr = df["cost"] * m.Time + df["cost"] * m.Size
>>> expr
<Expression height=5 terms=10 type=linear>
┌──────┬──────┬──────────────────────────────┐
│ item ┆ time ┆ expression                   │
│ (2)  ┆ (3)  ┆                              │
╞══════╪══════╪══════════════════════════════╡
│ 1    ┆ mon  ┆ Time[1,mon] + Size[1,mon]    │
│ 1    ┆ tue  ┆ 2 Time[1,tue] +2 Size[1,tue] │
│ 1    ┆ wed  ┆ 3 Time[1,wed] +3 Size[1,wed] │
│ 2    ┆ mon  ┆ 4 Time[2,mon] +4 Size[2,mon] │
│ 2    ┆ tue  ┆ 5 Time[2,tue] +5 Size[2,tue] │
└──────┴──────┴──────────────────────────────┘

Methods:

Name Description
constant

Creates a new expression equal to the given constant.

degree

Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).

evaluate

Computes the value of the expression using the variables' solutions.

map

Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.

rolling_sum

Calculates the rolling sum of the Expression over a specified window size for a given dimension.

sum

Sums an expression over specified dimensions.

sum_by

Like Expression.sum, but the sum is taken over all dimensions except those specified in by (just like a group_by().sum() operation).

to_expr

Returns the expression itself.

to_str

Converts the expression to a human-readable string, or several arranged in a table.

within

Filters this expression to only include the dimensions within the provided set.

Attributes:

Name Type Description
constant_terms DataFrame

Returns all the constant terms in the expression.

is_quadratic bool

Returns True if the expression is quadratic, False otherwise.

terms int

The number of terms across all subexpressions.

variable_terms DataFrame

Returns all the non-constant terms in the expression.

Source code in pyoframe/_core.py
def __init__(self, data: pl.DataFrame, name: str | None = None):
    # Sanity checks, VAR_KEY and COEF_KEY must be present
    assert VAR_KEY in data.columns, "Missing variable column."
    assert COEF_KEY in data.columns, "Missing coefficient column."

    # Sanity check no duplicates labels
    if Config.enable_is_duplicated_expression_safety_check:
        duplicated_mask = data.drop(COEF_KEY).is_duplicated()
        # In theory this should never happen unless there's a bug in the library
        if duplicated_mask.any():
            duplicated_data = data.filter(duplicated_mask)
            raise ValueError(
                f"Cannot create an expression with duplicate labels:\n{duplicated_data}."
            )

    data = _simplify_expr_df(data)

    if name is None:
        warnings.warn(
            "Expression should be given a name to support troubleshooting.",
            UserWarning,
        )

        super().__init__(data)
    else:
        super().__init__(data, name=name)

constant_terms: pl.DataFrame

Returns all the constant terms in the expression.

is_quadratic: bool

Returns True if the expression is quadratic, False otherwise.

Computes in O(1) since expressions are quadratic if and only if self.data contain the QUAD_VAR_KEY column.

Examples:

>>> import pandas as pd
>>> m = pf.Model()
>>> m.v = Variable()
>>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}) * m.v
>>> expr *= m.v
>>> expr.is_quadratic
True

terms: int

The number of terms across all subexpressions.

Expressions equal to zero count as one term.

Examples:

>>> import polars as pl
>>> m = pf.Model()
>>> m.v = pf.Variable({"t": [1, 2]})
>>> coef = pl.DataFrame({"t": [1, 2], "coef": [0, 1]})
>>> coef * (m.v + 4)
<Expression height=2 terms=3 type=linear>
┌─────┬────────────┐
│ t   ┆ expression │
│ (2) ┆            │
╞═════╪════════════╡
│ 1   ┆ 0          │
│ 2   ┆ 4 + v[2]   │
└─────┴────────────┘
>>> (coef * (m.v + 4)).terms
3

variable_terms: pl.DataFrame

Returns all the non-constant terms in the expression.

constant(constant: int | float) -> Expression

Creates a new expression equal to the given constant.

Examples:

>>> pf.Expression.constant(5)
<Expression terms=1 type=constant>
5
Source code in pyoframe/_core.py
@classmethod
def constant(cls, constant: int | float) -> Expression:
    """Creates a new expression equal to the given constant.

    Examples:
        >>> pf.Expression.constant(5)
        <Expression terms=1 type=constant>
        5
    """
    return cls(
        pl.DataFrame(
            {
                COEF_KEY: [constant],
                VAR_KEY: [CONST_TERM],
            },
            schema={COEF_KEY: pl.Float64, VAR_KEY: Config.id_dtype},
        ),
        name=str(constant),
    )

degree(return_str: bool = False) -> int | str

degree(return_str: Literal[False] = False) -> int
degree(return_str: Literal[True] = True) -> str

Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).

Parameters:

Name Type Description Default
return_str bool

If True, returns the degree as a string ("constant", "linear", or "quadratic"). If False, returns the degree as an integer (0, 1, or 2).

False

Examples:

>>> import pandas as pd
>>> m = pf.Model()
>>> m.v1 = pf.Variable()
>>> m.v2 = pf.Variable()
>>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}).to_expr()
>>> expr.degree()
0
>>> expr *= m.v1
>>> expr.degree()
1
>>> expr += (m.v2**2).over("dim1")
>>> expr.degree()
2
>>> expr.degree(return_str=True)
'quadratic'
Source code in pyoframe/_core.py
def degree(self, return_str: bool = False) -> int | str:
    """Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).

    Parameters:
        return_str: If `True`, returns the degree as a string (`"constant"`, `"linear"`, or `"quadratic"`).
            If `False`, returns the degree as an integer (0, 1, or 2).

    Examples:
        >>> import pandas as pd
        >>> m = pf.Model()
        >>> m.v1 = pf.Variable()
        >>> m.v2 = pf.Variable()
        >>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}).to_expr()
        >>> expr.degree()
        0
        >>> expr *= m.v1
        >>> expr.degree()
        1
        >>> expr += (m.v2**2).over("dim1")
        >>> expr.degree()
        2
        >>> expr.degree(return_str=True)
        'quadratic'
    """
    if self.is_quadratic:
        return "quadratic" if return_str else 2
    # TODO improve performance of .evaluate() by ensuring early exit if linear
    elif (self.data.get_column(VAR_KEY) != CONST_TERM).any():
        return "linear" if return_str else 1
    else:
        return "constant" if return_str else 0

evaluate() -> pl.DataFrame

Computes the value of the expression using the variables' solutions.

Returns:

Type Description
DataFrame

A Polars DataFrame for dimensioned expressions a float for dimensionless expressions.

Examples:

>>> m = pf.Model()
>>> m.X = pf.Variable({"dim1": [1, 2, 3]}, lb=10, ub=10)
>>> m.expr = 2 * m.X * m.X + 1
>>> m.expr.evaluate()
Traceback (most recent call last):
...
ValueError: Cannot evaluate the expression 'expr' before calling model.optimize().
>>> m.constant_expression = m.expr - 2 * m.X * m.X
>>> m.constant_expression.evaluate()
shape: (3, 2)
┌──────┬──────────┐
│ dim1 ┆ solution │
│ ---  ┆ ---      │
│ i64  ┆ f64      │
╞══════╪══════════╡
│ 1    ┆ 1.0      │
│ 2    ┆ 1.0      │
│ 3    ┆ 1.0      │
└──────┴──────────┘
>>> m.optimize()
>>> m.expr.evaluate()
shape: (3, 2)
┌──────┬──────────┐
│ dim1 ┆ solution │
│ ---  ┆ ---      │
│ i64  ┆ f64      │
╞══════╪══════════╡
│ 1    ┆ 201.0    │
│ 2    ┆ 201.0    │
│ 3    ┆ 201.0    │
└──────┴──────────┘
>>> m.expr.sum().evaluate()
603.0
Source code in pyoframe/_core.py
@unwrap_single_values
def evaluate(self) -> pl.DataFrame:
    """Computes the value of the expression using the variables' solutions.

    Returns:
        A Polars `DataFrame` for dimensioned expressions a `float` for dimensionless expressions.

    Examples:
        >>> m = pf.Model()
        >>> m.X = pf.Variable({"dim1": [1, 2, 3]}, lb=10, ub=10)
        >>> m.expr = 2 * m.X * m.X + 1

        >>> m.expr.evaluate()
        Traceback (most recent call last):
        ...
        ValueError: Cannot evaluate the expression 'expr' before calling model.optimize().

        >>> m.constant_expression = m.expr - 2 * m.X * m.X
        >>> m.constant_expression.evaluate()
        shape: (3, 2)
        ┌──────┬──────────┐
        │ dim1 ┆ solution │
        │ ---  ┆ ---      │
        │ i64  ┆ f64      │
        ╞══════╪══════════╡
        │ 1    ┆ 1.0      │
        │ 2    ┆ 1.0      │
        │ 3    ┆ 1.0      │
        └──────┴──────────┘


        >>> m.optimize()
        >>> m.expr.evaluate()
        shape: (3, 2)
        ┌──────┬──────────┐
        │ dim1 ┆ solution │
        │ ---  ┆ ---      │
        │ i64  ┆ f64      │
        ╞══════╪══════════╡
        │ 1    ┆ 201.0    │
        │ 2    ┆ 201.0    │
        │ 3    ┆ 201.0    │
        └──────┴──────────┘

        >>> m.expr.sum().evaluate()
        603.0

    """
    assert self._model is not None, (
        "Expression must be added to the model to use .value"
    )

    df = self.data.rename({COEF_KEY: SOLUTION_KEY})
    sm = self._model.poi
    attr = poi.VariableAttribute.Value

    if self.degree() == 0:
        df = df.drop(self._variable_columns)
    elif (
        self._model.attr.TerminationStatus
        == poi.TerminationStatusCode.OPTIMIZE_NOT_CALLED
    ):
        raise ValueError(
            f"Cannot evaluate the expression '{self.name}' before calling model.optimize()."
        )
    else:
        for var_col in self._variable_columns:
            values = [
                sm.get_variable_attribute(poi.VariableIndex(v_id), attr)
                for v_id in df.get_column(var_col).to_list()
            ]

            df = df.drop(var_col).with_columns(
                pl.col(SOLUTION_KEY) * pl.Series(values, dtype=pl.Float64)
            )

    dims = self.dimensions
    if dims is not None:
        df = df.group_by(dims, maintain_order=Config.maintain_order)
    return df.sum()

map(mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression

Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.

This is particularly useful to go from one type of dimensions to another. For example, to convert data that is indexed by city to data indexed by country (see example).

Parameters:

Name Type Description Default
mapping_set SetTypes

The set to map the expression to. This can be a DataFrame, Index, or another Set.

required
drop_shared_dims bool

If True, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and repeated rows are summed. If False, the shared dimensions are kept in the resulting expression.

True

Returns:

Type Description
Expression

A new Expression containing the result of the mapping operation.

Examples:

>>> import polars as pl
>>> pop_data = pl.DataFrame(
...     {
...         "city": ["Toronto", "Vancouver", "Boston"],
...         "year": [2024, 2024, 2024],
...         "population": [10, 2, 8],
...     }
... ).to_expr()
>>> cities_and_countries = pl.DataFrame(
...     {
...         "city": ["Toronto", "Vancouver", "Boston"],
...         "country": ["Canada", "Canada", "USA"],
...     }
... )
>>> pop_data.map(cities_and_countries)
<Expression height=2 terms=2 type=constant>
┌──────┬─────────┬────────────┐
│ year ┆ country ┆ expression │
│ (1)  ┆ (2)     ┆            │
╞══════╪═════════╪════════════╡
│ 2024 ┆ Canada  ┆ 12         │
│ 2024 ┆ USA     ┆ 8          │
└──────┴─────────┴────────────┘
>>> pop_data.map(cities_and_countries, drop_shared_dims=False)
<Expression height=3 terms=3 type=constant>
┌───────────┬──────┬─────────┬────────────┐
│ city      ┆ year ┆ country ┆ expression │
│ (3)       ┆ (1)  ┆ (2)     ┆            │
╞═══════════╪══════╪═════════╪════════════╡
│ Toronto   ┆ 2024 ┆ Canada  ┆ 10         │
│ Vancouver ┆ 2024 ┆ Canada  ┆ 2          │
│ Boston    ┆ 2024 ┆ USA     ┆ 8          │
└───────────┴──────┴─────────┴────────────┘
Source code in pyoframe/_core.py
def map(self, mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression:
    """Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.

    This is particularly useful to go from one type of dimensions to another. For example, to convert data that
    is indexed by city to data indexed by country (see example).

    Parameters:
        mapping_set:
            The set to map the expression to. This can be a DataFrame, Index, or another Set.
        drop_shared_dims:
            If `True`, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and
                repeated rows are summed.
            If `False`, the shared dimensions are kept in the resulting expression.

    Returns:
        A new Expression containing the result of the mapping operation.

    Examples:
        >>> import polars as pl
        >>> pop_data = pl.DataFrame(
        ...     {
        ...         "city": ["Toronto", "Vancouver", "Boston"],
        ...         "year": [2024, 2024, 2024],
        ...         "population": [10, 2, 8],
        ...     }
        ... ).to_expr()
        >>> cities_and_countries = pl.DataFrame(
        ...     {
        ...         "city": ["Toronto", "Vancouver", "Boston"],
        ...         "country": ["Canada", "Canada", "USA"],
        ...     }
        ... )
        >>> pop_data.map(cities_and_countries)
        <Expression height=2 terms=2 type=constant>
        ┌──────┬─────────┬────────────┐
        │ year ┆ country ┆ expression │
        │ (1)  ┆ (2)     ┆            │
        ╞══════╪═════════╪════════════╡
        │ 2024 ┆ Canada  ┆ 12         │
        │ 2024 ┆ USA     ┆ 8          │
        └──────┴─────────┴────────────┘

        >>> pop_data.map(cities_and_countries, drop_shared_dims=False)
        <Expression height=3 terms=3 type=constant>
        ┌───────────┬──────┬─────────┬────────────┐
        │ city      ┆ year ┆ country ┆ expression │
        │ (3)       ┆ (1)  ┆ (2)     ┆            │
        ╞═══════════╪══════╪═════════╪════════════╡
        │ Toronto   ┆ 2024 ┆ Canada  ┆ 10         │
        │ Vancouver ┆ 2024 ┆ Canada  ┆ 2          │
        │ Boston    ┆ 2024 ┆ USA     ┆ 8          │
        └───────────┴──────┴─────────┴────────────┘
    """
    mapping_set = Set(mapping_set)

    dims = self.dimensions
    if dims is None:
        raise ValueError("Cannot use .map() on an expression with no dimensions.")

    mapping_dims = mapping_set.dimensions
    if mapping_dims is None:
        raise ValueError(
            "Cannot use .map() with a mapping set containing no dimensions."
        )

    shared_dims = [dim for dim in dims if dim in mapping_dims]
    if not shared_dims:
        raise ValueError(
            f"Cannot apply .map() as there are no shared dimensions between the expression (dims={self.dimensions}) and the mapping set (dims={mapping_set.dimensions})."
        )

    mapped_expression = self * mapping_set

    if drop_shared_dims:
        mapped_expression = mapped_expression.sum(*shared_dims)

    mapped_expression.name = f"{self.name}.map(…)"

    return mapped_expression

rolling_sum(over: str, window_size: int) -> Expression

Calculates the rolling sum of the Expression over a specified window size for a given dimension.

This method applies a rolling sum operation over the dimension specified by over, using a window defined by window_size.

Parameters:

Name Type Description Default
over str

The name of the dimension (column) over which the rolling sum is calculated. This dimension must exist within the Expression's dimensions.

required
window_size int

The size of the moving window in terms of number of records. The rolling sum is calculated over this many consecutive elements.

required

Returns:

Type Description
Expression

A new Expression instance containing the result of the rolling sum operation. This new Expression retains all dimensions (columns) of the original data, with the rolling sum applied over the specified dimension.

Examples:

>>> import polars as pl
>>> cost = pl.DataFrame(
...     {
...         "item": [1, 1, 1, 2, 2],
...         "time": [1, 2, 3, 1, 2],
...         "cost": [1, 2, 3, 4, 5],
...     }
... )
>>> m = pf.Model()
>>> m.quantity = pf.Variable(cost[["item", "time"]])
>>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
<Expression height=5 terms=8 type=linear>
┌──────┬──────┬──────────────────────────────────┐
│ item ┆ time ┆ expression                       │
│ (2)  ┆ (3)  ┆                                  │
╞══════╪══════╪══════════════════════════════════╡
│ 1    ┆ 1    ┆ quantity[1,1]                    │
│ 1    ┆ 2    ┆ quantity[1,1] +2 quantity[1,2]   │
│ 1    ┆ 3    ┆ 2 quantity[1,2] +3 quantity[1,3] │
│ 2    ┆ 1    ┆ 4 quantity[2,1]                  │
│ 2    ┆ 2    ┆ 4 quantity[2,1] +5 quantity[2,2] │
└──────┴──────┴──────────────────────────────────┘
Source code in pyoframe/_core.py
@return_new
def rolling_sum(self, over: str, window_size: int):
    """Calculates the rolling sum of the Expression over a specified window size for a given dimension.

    This method applies a rolling sum operation over the dimension specified by `over`,
    using a window defined by `window_size`.


    Parameters:
        over:
            The name of the dimension (column) over which the rolling sum is calculated.
            This dimension must exist within the Expression's dimensions.
        window_size:
            The size of the moving window in terms of number of records.
            The rolling sum is calculated over this many consecutive elements.

    Returns:
        A new Expression instance containing the result of the rolling sum operation.
            This new Expression retains all dimensions (columns) of the original data,
            with the rolling sum applied over the specified dimension.

    Examples:
        >>> import polars as pl
        >>> cost = pl.DataFrame(
        ...     {
        ...         "item": [1, 1, 1, 2, 2],
        ...         "time": [1, 2, 3, 1, 2],
        ...         "cost": [1, 2, 3, 4, 5],
        ...     }
        ... )
        >>> m = pf.Model()
        >>> m.quantity = pf.Variable(cost[["item", "time"]])
        >>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
        <Expression height=5 terms=8 type=linear>
        ┌──────┬──────┬──────────────────────────────────┐
        │ item ┆ time ┆ expression                       │
        │ (2)  ┆ (3)  ┆                                  │
        ╞══════╪══════╪══════════════════════════════════╡
        │ 1    ┆ 1    ┆ quantity[1,1]                    │
        │ 1    ┆ 2    ┆ quantity[1,1] +2 quantity[1,2]   │
        │ 1    ┆ 3    ┆ 2 quantity[1,2] +3 quantity[1,3] │
        │ 2    ┆ 1    ┆ 4 quantity[2,1]                  │
        │ 2    ┆ 2    ┆ 4 quantity[2,1] +5 quantity[2,2] │
        └──────┴──────┴──────────────────────────────────┘
    """
    dims = self.dimensions
    if dims is None:
        raise ValueError(
            "Cannot use rolling_sum() with an expression with no dimensions."
        )
    assert over in dims, f"Cannot sum over {over} as it is not in {dims}"
    remaining_dims = [dim for dim in dims if dim not in over]

    return pl.concat(
        [
            df.with_columns(pl.col(over).max())
            for _, df in self.data.rolling(
                index_column=over,
                period=f"{window_size}i",
                group_by=remaining_dims,
            )
        ]
    )

sum(*over: str) -> Expression

Sums an expression over specified dimensions.

If no dimensions are specified, the sum is taken over all of the expression's dimensions.

Examples:

>>> expr = pl.DataFrame(
...     {
...         "time": ["mon", "tue", "wed", "mon", "tue"],
...         "place": [
...             "Toronto",
...             "Toronto",
...             "Toronto",
...             "Vancouver",
...             "Vancouver",
...         ],
...         "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
...     }
... ).to_expr()
>>> expr
<Expression height=5 terms=5 type=constant>
┌──────┬───────────┬────────────┐
│ time ┆ place     ┆ expression │
│ (3)  ┆ (2)       ┆            │
╞══════╪═══════════╪════════════╡
│ mon  ┆ Toronto   ┆ 1000000    │
│ tue  ┆ Toronto   ┆ 3000000    │
│ wed  ┆ Toronto   ┆ 2000000    │
│ mon  ┆ Vancouver ┆ 1000000    │
│ tue  ┆ Vancouver ┆ 2000000    │
└──────┴───────────┴────────────┘
>>> expr.sum("time")
<Expression height=2 terms=2 type=constant>
┌───────────┬────────────┐
│ place     ┆ expression │
│ (2)       ┆            │
╞═══════════╪════════════╡
│ Toronto   ┆ 6000000    │
│ Vancouver ┆ 3000000    │
└───────────┴────────────┘
>>> expr.sum()
<Expression terms=1 type=constant>
9000000

If the given dimensions don't exist, an error will be raised:

>>> expr.sum("city")
Traceback (most recent call last):
...
AssertionError: Cannot sum over ['city'] as it is not in ['time', 'place']
See Also

pyoframe.Expression.sum_by for summing over all dimensions except those that are specified.

Source code in pyoframe/_core.py
@return_new
def sum(self, *over: str):
    """Sums an expression over specified dimensions.

    If no dimensions are specified, the sum is taken over all of the expression's dimensions.

    Examples:
        >>> expr = pl.DataFrame(
        ...     {
        ...         "time": ["mon", "tue", "wed", "mon", "tue"],
        ...         "place": [
        ...             "Toronto",
        ...             "Toronto",
        ...             "Toronto",
        ...             "Vancouver",
        ...             "Vancouver",
        ...         ],
        ...         "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
        ...     }
        ... ).to_expr()
        >>> expr
        <Expression height=5 terms=5 type=constant>
        ┌──────┬───────────┬────────────┐
        │ time ┆ place     ┆ expression │
        │ (3)  ┆ (2)       ┆            │
        ╞══════╪═══════════╪════════════╡
        │ mon  ┆ Toronto   ┆ 1000000    │
        │ tue  ┆ Toronto   ┆ 3000000    │
        │ wed  ┆ Toronto   ┆ 2000000    │
        │ mon  ┆ Vancouver ┆ 1000000    │
        │ tue  ┆ Vancouver ┆ 2000000    │
        └──────┴───────────┴────────────┘
        >>> expr.sum("time")
        <Expression height=2 terms=2 type=constant>
        ┌───────────┬────────────┐
        │ place     ┆ expression │
        │ (2)       ┆            │
        ╞═══════════╪════════════╡
        │ Toronto   ┆ 6000000    │
        │ Vancouver ┆ 3000000    │
        └───────────┴────────────┘
        >>> expr.sum()
        <Expression terms=1 type=constant>
        9000000

        If the given dimensions don't exist, an error will be raised:

        >>> expr.sum("city")
        Traceback (most recent call last):
        ...
        AssertionError: Cannot sum over ['city'] as it is not in ['time', 'place']

    See Also:
        [pyoframe.Expression.sum_by][] for summing over all dimensions _except_ those that are specified.
    """
    dims = self.dimensions
    if dims is None:
        raise ValueError("Cannot sum a dimensionless expression.")
    if not over:
        over = tuple(dims)
    assert set(over) <= set(dims), (
        f"Cannot sum over {list(over)} as it is not in {dims}"
    )
    remaining_dims = [dim for dim in dims if dim not in over]

    return (
        self.data.drop(over)
        .group_by(
            remaining_dims + self._variable_columns,
            maintain_order=Config.maintain_order,
        )
        .sum()
    )

sum_by(*by: str)

Like Expression.sum, but the sum is taken over all dimensions except those specified in by (just like a group_by().sum() operation).

Examples:

>>> expr = pl.DataFrame(
...     {
...         "time": ["mon", "tue", "wed", "mon", "tue"],
...         "place": [
...             "Toronto",
...             "Toronto",
...             "Toronto",
...             "Vancouver",
...             "Vancouver",
...         ],
...         "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
...     }
... ).to_expr()
>>> expr
<Expression height=5 terms=5 type=constant>
┌──────┬───────────┬────────────┐
│ time ┆ place     ┆ expression │
│ (3)  ┆ (2)       ┆            │
╞══════╪═══════════╪════════════╡
│ mon  ┆ Toronto   ┆ 1000000    │
│ tue  ┆ Toronto   ┆ 3000000    │
│ wed  ┆ Toronto   ┆ 2000000    │
│ mon  ┆ Vancouver ┆ 1000000    │
│ tue  ┆ Vancouver ┆ 2000000    │
└──────┴───────────┴────────────┘
>>> expr.sum_by("place")
<Expression height=2 terms=2 type=constant>
┌───────────┬────────────┐
│ place     ┆ expression │
│ (2)       ┆            │
╞═══════════╪════════════╡
│ Toronto   ┆ 6000000    │
│ Vancouver ┆ 3000000    │
└───────────┴────────────┘

If the specified dimensions don't exist, an error will be raised:

>>> expr.sum_by("city")
Traceback (most recent call last):
...
ValueError: Cannot sum by ['city'] because it is not a valid dimension. The expression's dimensions are: ['time', 'place'].
>>> total_sum = expr.sum()
>>> total_sum.sum_by("time")
Traceback (most recent call last):
...
ValueError: Cannot sum a dimensionless expression.
See Also

pyoframe.Expression.sum for summing over specified dimensions.

Source code in pyoframe/_core.py
def sum_by(self, *by: str):
    """Like [`Expression.sum`][pyoframe.Expression.sum], but the sum is taken over all dimensions *except* those specified in `by` (just like a `group_by().sum()` operation).

    Examples:
        >>> expr = pl.DataFrame(
        ...     {
        ...         "time": ["mon", "tue", "wed", "mon", "tue"],
        ...         "place": [
        ...             "Toronto",
        ...             "Toronto",
        ...             "Toronto",
        ...             "Vancouver",
        ...             "Vancouver",
        ...         ],
        ...         "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
        ...     }
        ... ).to_expr()
        >>> expr
        <Expression height=5 terms=5 type=constant>
        ┌──────┬───────────┬────────────┐
        │ time ┆ place     ┆ expression │
        │ (3)  ┆ (2)       ┆            │
        ╞══════╪═══════════╪════════════╡
        │ mon  ┆ Toronto   ┆ 1000000    │
        │ tue  ┆ Toronto   ┆ 3000000    │
        │ wed  ┆ Toronto   ┆ 2000000    │
        │ mon  ┆ Vancouver ┆ 1000000    │
        │ tue  ┆ Vancouver ┆ 2000000    │
        └──────┴───────────┴────────────┘

        >>> expr.sum_by("place")
        <Expression height=2 terms=2 type=constant>
        ┌───────────┬────────────┐
        │ place     ┆ expression │
        │ (2)       ┆            │
        ╞═══════════╪════════════╡
        │ Toronto   ┆ 6000000    │
        │ Vancouver ┆ 3000000    │
        └───────────┴────────────┘

        If the specified dimensions don't exist, an error will be raised:

        >>> expr.sum_by("city")
        Traceback (most recent call last):
        ...
        ValueError: Cannot sum by ['city'] because it is not a valid dimension. The expression's dimensions are: ['time', 'place'].

        >>> total_sum = expr.sum()
        >>> total_sum.sum_by("time")
        Traceback (most recent call last):
        ...
        ValueError: Cannot sum a dimensionless expression.

    See Also:
        [pyoframe.Expression.sum][] for summing over specified dimensions.
    """
    if not by:
        raise ValueError("sum_by requires at least 1 argument.")
    dims = self.dimensions
    if dims is None:
        raise ValueError("Cannot sum a dimensionless expression.")
    if not set(by) <= set(dims):
        raise ValueError(
            f"Cannot sum by {list(set(by) - set(dims))} because it is not a valid dimension. The expression's dimensions are: {list(dims)}."
        )
    remaining_dims = [dim for dim in dims if dim not in by]
    return self.sum(*remaining_dims)

to_expr() -> Expression

Returns the expression itself.

Source code in pyoframe/_core.py
def to_expr(self) -> Expression:
    """Returns the expression itself."""
    return self

to_str(str_col_name: str = 'expression', include_const_term: bool = True, return_df: bool = False) -> str | pl.DataFrame

to_str(
    str_col_name: str = "expression",
    include_const_term: bool = True,
    return_df: Literal[False] = False,
) -> str
to_str(
    str_col_name: str = "expression",
    include_const_term: bool = True,
    return_df: Literal[True] = True,
) -> pl.DataFrame

Converts the expression to a human-readable string, or several arranged in a table.

Long expressions are truncated according to Config.print_max_terms and Config.print_polars_config.

str(pyoframe.Expression) is equivalent to pyoframe.Expression.to_str().

Parameters:

Name Type Description Default
str_col_name str

The name of the column containing the string representation of the expression (dimensioned expressions only).

'expression'
include_const_term bool

If False, constant terms are omitted from the string representation.

True
return_df bool

If True, returns a DataFrame containing the human-readable strings instead of the DataFrame's string representation.

False

Examples:

>>> import polars as pl
>>> m = pf.Model()
>>> x = pf.Set(x=range(1000))
>>> y = pf.Set(y=range(1000))
>>> m.V = pf.Variable(x, y)
>>> expr = 2 * m.V * m.V + 3
>>> print(expr.to_str())
┌────────┬────────┬──────────────────────────────┐
│ x      ┆ y      ┆ expression                   │
│ (1000) ┆ (1000) ┆                              │
╞════════╪════════╪══════════════════════════════╡
│ 0      ┆ 0      ┆ 3 +2 V[0,0] * V[0,0]         │
│ 0      ┆ 1      ┆ 3 +2 V[0,1] * V[0,1]         │
│ 0      ┆ 2      ┆ 3 +2 V[0,2] * V[0,2]         │
│ 0      ┆ 3      ┆ 3 +2 V[0,3] * V[0,3]         │
│ 0      ┆ 4      ┆ 3 +2 V[0,4] * V[0,4]         │
│ …      ┆ …      ┆ …                            │
│ 999    ┆ 995    ┆ 3 +2 V[999,995] * V[999,995] │
│ 999    ┆ 996    ┆ 3 +2 V[999,996] * V[999,996] │
│ 999    ┆ 997    ┆ 3 +2 V[999,997] * V[999,997] │
│ 999    ┆ 998    ┆ 3 +2 V[999,998] * V[999,998] │
│ 999    ┆ 999    ┆ 3 +2 V[999,999] * V[999,999] │
└────────┴────────┴──────────────────────────────┘
>>> expr = expr.sum("y")
>>> print(expr.to_str())
┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
│ x      ┆ expression                                                                              │
│ (1000) ┆                                                                                         │
╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
│ 0      ┆ 3000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …      │
│ 1      ┆ 3000 +2 V[1,0] * V[1,0] +2 V[1,1] * V[1,1] +2 V[1,2] * V[1,2] +2 V[1,3] * V[1,3] …      │
│ 2      ┆ 3000 +2 V[2,0] * V[2,0] +2 V[2,1] * V[2,1] +2 V[2,2] * V[2,2] +2 V[2,3] * V[2,3] …      │
│ 3      ┆ 3000 +2 V[3,0] * V[3,0] +2 V[3,1] * V[3,1] +2 V[3,2] * V[3,2] +2 V[3,3] * V[3,3] …      │
│ 4      ┆ 3000 +2 V[4,0] * V[4,0] +2 V[4,1] * V[4,1] +2 V[4,2] * V[4,2] +2 V[4,3] * V[4,3] …      │
│ …      ┆ …                                                                                       │
│ 995    ┆ 3000 +2 V[995,0] * V[995,0] +2 V[995,1] * V[995,1] +2 V[995,2] * V[995,2] +2 V[995,3] * │
│        ┆ V[995,3] …                                                                              │
│ 996    ┆ 3000 +2 V[996,0] * V[996,0] +2 V[996,1] * V[996,1] +2 V[996,2] * V[996,2] +2 V[996,3] * │
│        ┆ V[996,3] …                                                                              │
│ 997    ┆ 3000 +2 V[997,0] * V[997,0] +2 V[997,1] * V[997,1] +2 V[997,2] * V[997,2] +2 V[997,3] * │
│        ┆ V[997,3] …                                                                              │
│ 998    ┆ 3000 +2 V[998,0] * V[998,0] +2 V[998,1] * V[998,1] +2 V[998,2] * V[998,2] +2 V[998,3] * │
│        ┆ V[998,3] …                                                                              │
│ 999    ┆ 3000 +2 V[999,0] * V[999,0] +2 V[999,1] * V[999,1] +2 V[999,2] * V[999,2] +2 V[999,3] * │
│        ┆ V[999,3] …                                                                              │
└────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
>>> expr = expr.sum("x")
>>> print(expr.to_str())
3000000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …
Source code in pyoframe/_core.py
def to_str(
    self,
    str_col_name: str = "expression",
    include_const_term: bool = True,
    return_df: bool = False,
) -> str | pl.DataFrame:
    """Converts the expression to a human-readable string, or several arranged in a table.

    Long expressions are truncated according to [`Config.print_max_terms`][pyoframe._Config.print_max_terms] and [`Config.print_polars_config`][pyoframe._Config.print_polars_config].

    `str(pyoframe.Expression)` is equivalent to `pyoframe.Expression.to_str()`.

    Parameters:
        str_col_name:
            The name of the column containing the string representation of the expression (dimensioned expressions only).
        include_const_term:
            If `False`, constant terms are omitted from the string representation.
        return_df:
            If `True`, returns a DataFrame containing the human-readable strings instead of the DataFrame's string representation.

    Examples:
        >>> import polars as pl
        >>> m = pf.Model()
        >>> x = pf.Set(x=range(1000))
        >>> y = pf.Set(y=range(1000))
        >>> m.V = pf.Variable(x, y)
        >>> expr = 2 * m.V * m.V + 3
        >>> print(expr.to_str())
        ┌────────┬────────┬──────────────────────────────┐
        │ x      ┆ y      ┆ expression                   │
        │ (1000) ┆ (1000) ┆                              │
        ╞════════╪════════╪══════════════════════════════╡
        │ 0      ┆ 0      ┆ 3 +2 V[0,0] * V[0,0]         │
        │ 0      ┆ 1      ┆ 3 +2 V[0,1] * V[0,1]         │
        │ 0      ┆ 2      ┆ 3 +2 V[0,2] * V[0,2]         │
        │ 0      ┆ 3      ┆ 3 +2 V[0,3] * V[0,3]         │
        │ 0      ┆ 4      ┆ 3 +2 V[0,4] * V[0,4]         │
        │ …      ┆ …      ┆ …                            │
        │ 999    ┆ 995    ┆ 3 +2 V[999,995] * V[999,995] │
        │ 999    ┆ 996    ┆ 3 +2 V[999,996] * V[999,996] │
        │ 999    ┆ 997    ┆ 3 +2 V[999,997] * V[999,997] │
        │ 999    ┆ 998    ┆ 3 +2 V[999,998] * V[999,998] │
        │ 999    ┆ 999    ┆ 3 +2 V[999,999] * V[999,999] │
        └────────┴────────┴──────────────────────────────┘
        >>> expr = expr.sum("y")
        >>> print(expr.to_str())
        ┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
        │ x      ┆ expression                                                                              │
        │ (1000) ┆                                                                                         │
        ╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
        │ 0      ┆ 3000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …      │
        │ 1      ┆ 3000 +2 V[1,0] * V[1,0] +2 V[1,1] * V[1,1] +2 V[1,2] * V[1,2] +2 V[1,3] * V[1,3] …      │
        │ 2      ┆ 3000 +2 V[2,0] * V[2,0] +2 V[2,1] * V[2,1] +2 V[2,2] * V[2,2] +2 V[2,3] * V[2,3] …      │
        │ 3      ┆ 3000 +2 V[3,0] * V[3,0] +2 V[3,1] * V[3,1] +2 V[3,2] * V[3,2] +2 V[3,3] * V[3,3] …      │
        │ 4      ┆ 3000 +2 V[4,0] * V[4,0] +2 V[4,1] * V[4,1] +2 V[4,2] * V[4,2] +2 V[4,3] * V[4,3] …      │
        │ …      ┆ …                                                                                       │
        │ 995    ┆ 3000 +2 V[995,0] * V[995,0] +2 V[995,1] * V[995,1] +2 V[995,2] * V[995,2] +2 V[995,3] * │
        │        ┆ V[995,3] …                                                                              │
        │ 996    ┆ 3000 +2 V[996,0] * V[996,0] +2 V[996,1] * V[996,1] +2 V[996,2] * V[996,2] +2 V[996,3] * │
        │        ┆ V[996,3] …                                                                              │
        │ 997    ┆ 3000 +2 V[997,0] * V[997,0] +2 V[997,1] * V[997,1] +2 V[997,2] * V[997,2] +2 V[997,3] * │
        │        ┆ V[997,3] …                                                                              │
        │ 998    ┆ 3000 +2 V[998,0] * V[998,0] +2 V[998,1] * V[998,1] +2 V[998,2] * V[998,2] +2 V[998,3] * │
        │        ┆ V[998,3] …                                                                              │
        │ 999    ┆ 3000 +2 V[999,0] * V[999,0] +2 V[999,1] * V[999,1] +2 V[999,2] * V[999,2] +2 V[999,3] * │
        │        ┆ V[999,3] …                                                                              │
        └────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
        >>> expr = expr.sum("x")
        >>> print(expr.to_str())
        3000000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …

    """
    # TODO consider optimizing using LazyFrames since .head() could maybe be automatically pushed up the chain of operations.
    data = self.data if include_const_term else self.variable_terms
    data = cast_coef_to_string(data)

    for var_col in self._variable_columns:
        temp_var_column = f"{var_col}_temp"
        if self._model is not None and self._model._var_map is not None:
            data = self._model._var_map.apply(
                data, to_col=temp_var_column, id_col=var_col
            )
        else:
            data = data.with_columns(
                pl.concat_str(pl.lit("x"), var_col).alias(temp_var_column)
            )
        data = data.with_columns(
            pl.when(pl.col(var_col) == CONST_TERM)
            .then(pl.lit(""))
            .otherwise(temp_var_column)
            .alias(var_col)
        ).drop(temp_var_column)
    if self.is_quadratic:
        data = data.with_columns(
            pl.when(pl.col(QUAD_VAR_KEY) == "")
            .then(pl.col(VAR_KEY))
            .otherwise(pl.concat_str(VAR_KEY, pl.lit(" * "), pl.col(QUAD_VAR_KEY)))
            .alias(VAR_KEY)
        ).drop(QUAD_VAR_KEY)

    dimensions = self.dimensions

    # Create a string for each term
    data = data.with_columns(
        pl.concat_str(
            COEF_KEY,
            pl.lit(" "),
            VAR_KEY,
        )
        .str.strip_chars(characters="  ")
        .alias(str_col_name)
    ).drop(COEF_KEY, VAR_KEY)

    if dimensions is not None:
        data = data.group_by(dimensions, maintain_order=Config.maintain_order).agg(
            pl.concat_str(
                pl.col(str_col_name)
                .head(Config.print_max_terms)
                .str.join(delimiter=" "),
                pl.when(pl.len() > Config.print_max_terms)
                .then(pl.lit(" …"))
                .otherwise(pl.lit("")),
            )
        )
    else:
        truncate = data.height > Config.print_max_terms
        if truncate:
            data = data.head(Config.print_max_terms)

        data = data.select(pl.col(str_col_name).str.join(delimiter=" "))

        if truncate:
            data = data.with_columns(
                pl.concat_str(pl.col(str_col_name), pl.lit(" …"))
            )

    # Remove leading +
    data = data.with_columns(pl.col(str_col_name).str.strip_chars(characters="  +"))

    if not return_df:
        if dimensions is None and not self._allowed_new_dims:
            data = data.item()
        else:
            data = self._add_shape_to_columns(data)
            data = self._add_allowed_new_dims_to_df(data)
            with Config.print_polars_config:
                data = repr(data)

    return data

within(set: SetTypes) -> Expression

Filters this expression to only include the dimensions within the provided set.

Examples:

>>> import pandas as pd
>>> general_expr = pd.DataFrame(
...     {"dim1": [1, 2, 3], "value": [1, 2, 3]}
... ).to_expr()
>>> filter_expr = pd.DataFrame({"dim1": [1, 3], "value": [5, 6]}).to_expr()
>>> general_expr.within(filter_expr).data
shape: (2, 3)
┌──────┬─────────┬───────────────┐
│ dim1 ┆ __coeff ┆ __variable_id │
│ ---  ┆ ---     ┆ ---           │
│ i64  ┆ f64     ┆ u32           │
╞══════╪═════════╪═══════════════╡
│ 1    ┆ 1.0     ┆ 0             │
│ 3    ┆ 3.0     ┆ 0             │
└──────┴─────────┴───────────────┘
Source code in pyoframe/_core.py
@return_new
def within(self, set: SetTypes):
    """Filters this expression to only include the dimensions within the provided set.

    Examples:
        >>> import pandas as pd
        >>> general_expr = pd.DataFrame(
        ...     {"dim1": [1, 2, 3], "value": [1, 2, 3]}
        ... ).to_expr()
        >>> filter_expr = pd.DataFrame({"dim1": [1, 3], "value": [5, 6]}).to_expr()
        >>> general_expr.within(filter_expr).data
        shape: (2, 3)
        ┌──────┬─────────┬───────────────┐
        │ dim1 ┆ __coeff ┆ __variable_id │
        │ ---  ┆ ---     ┆ ---           │
        │ i64  ┆ f64     ┆ u32           │
        ╞══════╪═════════╪═══════════════╡
        │ 1    ┆ 1.0     ┆ 0             │
        │ 3    ┆ 3.0     ┆ 0             │
        └──────┴─────────┴───────────────┘
    """
    df: pl.DataFrame = Set(set).data
    set_dims = _get_dimensions(df)
    assert set_dims is not None, (
        "Cannot use .within() with a set with no dimensions."
    )
    dims = self.dimensions
    assert dims is not None, (
        "Cannot use .within() with an expression with no dimensions."
    )
    dims_in_common = [dim for dim in dims if dim in set_dims]
    by_dims = df.select(dims_in_common).unique(maintain_order=Config.maintain_order)
    return self.data.join(
        by_dims,
        on=dims_in_common,
        maintain_order="left" if Config.maintain_order else None,
    )