Expression

Bases: BaseOperableBlock

Represents a linear or quadratic mathematical expression.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame(
...     {
...         "item": [1, 1, 1, 2, 2],
...         "time": ["mon", "tue", "wed", "mon", "tue"],
...         "cost": [1, 2, 3, 4, 5],
...     }
... ).set_index(["item", "time"])
>>> m = pf.Model()
>>> m.Time = pf.Variable(df.index)
>>> m.Size = pf.Variable(df.index)
>>> expr = df["cost"] * m.Time + df["cost"] * m.Size
>>> expr
<Expression (linear) height=5 terms=10>
┌──────┬──────┬──────────────────────────────┐
│ item ┆ time ┆ expression                   │
│ (2)  ┆ (3)  ┆                              │
╞══════╪══════╪══════════════════════════════╡
│ 1    ┆ mon  ┆ Time[1,mon] + Size[1,mon]    │
│ 1    ┆ tue  ┆ 2 Time[1,tue] +2 Size[1,tue] │
│ 1    ┆ wed  ┆ 3 Time[1,wed] +3 Size[1,wed] │
│ 2    ┆ mon  ┆ 4 Time[2,mon] +4 Size[2,mon] │
│ 2    ┆ tue  ┆ 5 Time[2,tue] +5 Size[2,tue] │
└──────┴──────┴──────────────────────────────┘

Methods:

Name	Description
`constant`	Creates a new expression equal to the given constant.
`degree`	Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).
`evaluate`	Computes the value of the expression using the variables' solutions.
`map`	Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.
`rolling_sum`	Calculates the rolling sum of the Expression over a specified window size for a given dimension.
`sum`	Sums an expression over specified dimensions.
`sum_by`	Like `Expression.sum`, but the sum is taken over all dimensions except those specified in `by` (just like a `group_by().sum()` operation).
`to_expr`	Returns the expression itself.
`to_str`	Converts the expression to a human-readable string, or several arranged in a table.
`within`	Filters this expression to only include the dimensions within the provided set.

Attributes:

Name	Type	Description
`constant_terms`	`DataFrame`	Returns all the constant terms in the expression.
`is_quadratic`	`bool`	Returns `True` if the expression is quadratic, False otherwise.
`terms`	`int`	The number of terms across all subexpressions.
`variable_terms`	`DataFrame`	Returns all the non-constant terms in the expression.

Source code in pyoframe/_core.py

def __init__(self, data: pl.DataFrame, name: str | None = None):
    # Sanity checks, VAR_KEY and COEF_KEY must be present
    assert VAR_KEY in data.columns, "Missing variable column."
    assert COEF_KEY in data.columns, "Missing coefficient column."

    # Sanity check no duplicates labels
    if Config.enable_is_duplicated_expression_safety_check:
        duplicated_mask = data.drop(COEF_KEY).is_duplicated()
        # In theory this should never happen unless there's a bug in the library
        if duplicated_mask.any():
            duplicated_data = data.filter(duplicated_mask)
            raise ValueError(
                f"Cannot create an expression with duplicate labels:\n{duplicated_data}."
            )

    data = _simplify_expr_df(data)

    if name is None:
        warnings.warn(
            "Expression should be given a name to support troubleshooting.",
            UserWarning,
        )

        super().__init__(data)
    else:
        super().__init__(data, name=name)

`constant_terms: pl.DataFrame`

Returns all the constant terms in the expression.

`is_quadratic: bool`

Returns True if the expression is quadratic, False otherwise.

Computes in O(1) since expressions are quadratic if and only if self.data contain the QUAD_VAR_KEY column.

Examples:

>>> import pandas as pd
>>> m = pf.Model()
>>> m.v = Variable()
>>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}) * m.v
>>> expr *= m.v
>>> expr.is_quadratic
True

`terms: int`

The number of terms across all subexpressions.

Expressions equal to zero count as one term.

Examples:

>>> import polars as pl
>>> m = pf.Model()
>>> m.v = pf.Variable({"t": [1, 2]})
>>> coef = pl.DataFrame({"t": [1, 2], "coef": [0, 1]})
>>> coef * (m.v + 4)
<Expression (linear) height=2 terms=3>
┌─────┬────────────┐
│ t   ┆ expression │
│ (2) ┆            │
╞═════╪════════════╡
│ 1   ┆ 0          │
│ 2   ┆ 4 + v[2]   │
└─────┴────────────┘
>>> (coef * (m.v + 4)).terms
3

`variable_terms: pl.DataFrame`

Returns all the non-constant terms in the expression.

`constant(constant: int | float) -> Expression`

Creates a new expression equal to the given constant.

Examples:

>>> pf.Expression.constant(5)
<Expression (parameter) terms=1>
5

Source code in pyoframe/_core.py

@classmethod
def constant(cls, constant: int | float) -> Expression:
    """Creates a new expression equal to the given constant.

    Examples:
        >>> pf.Expression.constant(5)
        <Expression (parameter) terms=1>
        5
    """
    return cls(
        pl.DataFrame(
            {
                COEF_KEY: [constant],
                VAR_KEY: [CONST_TERM],
            },
            schema={COEF_KEY: pl.Float64, VAR_KEY: Config.id_dtype},
        ),
        name=str(constant),
    )

`degree(return_str: bool = False) -> int | str`

degree(return_str: Literal[False] = False) -> int

degree(return_str: Literal[True] = True) -> str

Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).

Parameters:

Name	Type	Description	Default
`return_str`	`bool`	If `True`, returns the degree as a string (`"constant"`, `"linear"`, or `"quadratic"`). If `False`, returns the degree as an integer (0, 1, or 2).	`False`

Examples:

>>> m = pf.Model()
>>> m.v1 = pf.Variable()
>>> m.v2 = pf.Variable()
>>> expr = pf.Param({"dim1": [1, 2, 3], "value": [1, 2, 3]})
>>> expr.degree()
0
>>> expr *= m.v1
>>> expr.degree()
1
>>> expr += (m.v2**2).over("dim1")
>>> expr.degree()
2
>>> expr.degree(return_str=True)
'quadratic'

Source code in pyoframe/_core.py

def degree(self, return_str: bool = False) -> int | str:
    """Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).

    Parameters:
        return_str: If `True`, returns the degree as a string (`"constant"`, `"linear"`, or `"quadratic"`).
            If `False`, returns the degree as an integer (0, 1, or 2).

    Examples:
        >>> m = pf.Model()
        >>> m.v1 = pf.Variable()
        >>> m.v2 = pf.Variable()
        >>> expr = pf.Param({"dim1": [1, 2, 3], "value": [1, 2, 3]})
        >>> expr.degree()
        0
        >>> expr *= m.v1
        >>> expr.degree()
        1
        >>> expr += (m.v2**2).over("dim1")
        >>> expr.degree()
        2
        >>> expr.degree(return_str=True)
        'quadratic'
    """
    if self.is_quadratic:
        return "quadratic" if return_str else 2
    # TODO improve performance of .evaluate() by ensuring early exit if linear
    elif (self.data.get_column(VAR_KEY) != CONST_TERM).any():
        return "linear" if return_str else 1
    else:
        return "parameter" if return_str else 0

`evaluate() -> pl.DataFrame`

Computes the value of the expression using the variables' solutions.

Returns:

Type	Description
`DataFrame`	A Polars `DataFrame` for dimensioned expressions a `float` for dimensionless expressions.

Examples:

>>> m = pf.Model()
>>> m.X = pf.Variable({"dim1": [1, 2, 3]}, lb=10, ub=10)
>>> m.expr = 2 * m.X * m.X + 1

>>> m.expr.evaluate()
Traceback (most recent call last):
...
ValueError: Cannot evaluate the expression 'expr' before calling model.optimize().

>>> m.constant_expression = m.expr - 2 * m.X * m.X
>>> m.constant_expression.evaluate()
shape: (3, 2)
┌──────┬──────────┐
│ dim1 ┆ solution │
│ ---  ┆ ---      │
│ i64  ┆ f64      │
╞══════╪══════════╡
│ 1    ┆ 1.0      │
│ 2    ┆ 1.0      │
│ 3    ┆ 1.0      │
└──────┴──────────┘

>>> m.optimize()
>>> m.expr.evaluate()
shape: (3, 2)
┌──────┬──────────┐
│ dim1 ┆ solution │
│ ---  ┆ ---      │
│ i64  ┆ f64      │
╞══════╪══════════╡
│ 1    ┆ 201.0    │
│ 2    ┆ 201.0    │
│ 3    ┆ 201.0    │
└──────┴──────────┘

>>> m.expr.sum().evaluate()
603.0

Source code in pyoframe/_core.py

@unwrap_single_values
def evaluate(self) -> pl.DataFrame:
    """Computes the value of the expression using the variables' solutions.

    Returns:
        A Polars `DataFrame` for dimensioned expressions a `float` for dimensionless expressions.

    Examples:
        >>> m = pf.Model()
        >>> m.X = pf.Variable({"dim1": [1, 2, 3]}, lb=10, ub=10)
        >>> m.expr = 2 * m.X * m.X + 1

        >>> m.expr.evaluate()
        Traceback (most recent call last):
        ...
        ValueError: Cannot evaluate the expression 'expr' before calling model.optimize().

        >>> m.constant_expression = m.expr - 2 * m.X * m.X
        >>> m.constant_expression.evaluate()
        shape: (3, 2)
        ┌──────┬──────────┐
        │ dim1 ┆ solution │
        │ ---  ┆ ---      │
        │ i64  ┆ f64      │
        ╞══════╪══════════╡
        │ 1    ┆ 1.0      │
        │ 2    ┆ 1.0      │
        │ 3    ┆ 1.0      │
        └──────┴──────────┘


        >>> m.optimize()
        >>> m.expr.evaluate()
        shape: (3, 2)
        ┌──────┬──────────┐
        │ dim1 ┆ solution │
        │ ---  ┆ ---      │
        │ i64  ┆ f64      │
        ╞══════╪══════════╡
        │ 1    ┆ 201.0    │
        │ 2    ┆ 201.0    │
        │ 3    ┆ 201.0    │
        └──────┴──────────┘

        >>> m.expr.sum().evaluate()
        603.0

    """
    assert self._model is not None, (
        "Expression must be added to the model to use .value"
    )

    df = self.data.rename({COEF_KEY: SOLUTION_KEY})
    sm = self._model.poi
    attr = poi.VariableAttribute.Value

    if self.degree() == 0:
        df = df.drop(self._variable_columns)
    elif (
        self._model.attr.TerminationStatus
        == poi.TerminationStatusCode.OPTIMIZE_NOT_CALLED
    ):
        raise ValueError(
            f"Cannot evaluate the expression '{self.name}' before calling model.optimize()."
        )
    else:
        for var_col in self._variable_columns:
            values = [
                sm.get_variable_attribute(poi.VariableIndex(v_id), attr)
                for v_id in df.get_column(var_col).to_list()
            ]

            df = df.drop(var_col).with_columns(
                pl.col(SOLUTION_KEY) * pl.Series(values, dtype=pl.Float64)
            )

    dims = self.dimensions
    if dims is not None:
        df = df.group_by(dims, maintain_order=Config.maintain_order)
    return df.sum()

`map(mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression`

Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.

This is particularly useful to go from one type of dimensions to another. For example, to convert data that is indexed by city to data indexed by country (see example).

Parameters:

Name	Type	Description	Default
`mapping_set`	`SetTypes`	The set to map the expression to. This can be a DataFrame, Index, or another Set.	required
`drop_shared_dims`	`bool`	If `True`, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and repeated rows are summed. If `False`, the shared dimensions are kept in the resulting expression.	`True`

Returns:

Type	Description
`Expression`	A new Expression containing the result of the mapping operation.

Examples:

>>> import polars as pl
>>> pop_data = pf.Param(
...     {
...         "city": ["Toronto", "Vancouver", "Boston"],
...         "year": [2024, 2024, 2024],
...         "population": [10, 2, 8],
...     }
... )
>>> cities_and_countries = pl.DataFrame(
...     {
...         "city": ["Toronto", "Vancouver", "Boston"],
...         "country": ["Canada", "Canada", "USA"],
...     }
... )
>>> pop_data.map(cities_and_countries)
<Expression (parameter) height=2 terms=2>
┌──────┬─────────┬────────────┐
│ year ┆ country ┆ expression │
│ (1)  ┆ (2)     ┆            │
╞══════╪═════════╪════════════╡
│ 2024 ┆ Canada  ┆ 12         │
│ 2024 ┆ USA     ┆ 8          │
└──────┴─────────┴────────────┘

>>> pop_data.map(cities_and_countries, drop_shared_dims=False)
<Expression (parameter) height=3 terms=3>
┌───────────┬──────┬─────────┬────────────┐
│ city      ┆ year ┆ country ┆ expression │
│ (3)       ┆ (1)  ┆ (2)     ┆            │
╞═══════════╪══════╪═════════╪════════════╡
│ Toronto   ┆ 2024 ┆ Canada  ┆ 10         │
│ Vancouver ┆ 2024 ┆ Canada  ┆ 2          │
│ Boston    ┆ 2024 ┆ USA     ┆ 8          │
└───────────┴──────┴─────────┴────────────┘

Source code in pyoframe/_core.py

def map(self, mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression:
    """Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.

    This is particularly useful to go from one type of dimensions to another. For example, to convert data that
    is indexed by city to data indexed by country (see example).

    Parameters:
        mapping_set:
            The set to map the expression to. This can be a DataFrame, Index, or another Set.
        drop_shared_dims:
            If `True`, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and
                repeated rows are summed.
            If `False`, the shared dimensions are kept in the resulting expression.

    Returns:
        A new Expression containing the result of the mapping operation.

    Examples:
        >>> import polars as pl
        >>> pop_data = pf.Param(
        ...     {
        ...         "city": ["Toronto", "Vancouver", "Boston"],
        ...         "year": [2024, 2024, 2024],
        ...         "population": [10, 2, 8],
        ...     }
        ... )
        >>> cities_and_countries = pl.DataFrame(
        ...     {
        ...         "city": ["Toronto", "Vancouver", "Boston"],
        ...         "country": ["Canada", "Canada", "USA"],
        ...     }
        ... )
        >>> pop_data.map(cities_and_countries)
        <Expression (parameter) height=2 terms=2>
        ┌──────┬─────────┬────────────┐
        │ year ┆ country ┆ expression │
        │ (1)  ┆ (2)     ┆            │
        ╞══════╪═════════╪════════════╡
        │ 2024 ┆ Canada  ┆ 12         │
        │ 2024 ┆ USA     ┆ 8          │
        └──────┴─────────┴────────────┘

        >>> pop_data.map(cities_and_countries, drop_shared_dims=False)
        <Expression (parameter) height=3 terms=3>
        ┌───────────┬──────┬─────────┬────────────┐
        │ city      ┆ year ┆ country ┆ expression │
        │ (3)       ┆ (1)  ┆ (2)     ┆            │
        ╞═══════════╪══════╪═════════╪════════════╡
        │ Toronto   ┆ 2024 ┆ Canada  ┆ 10         │
        │ Vancouver ┆ 2024 ┆ Canada  ┆ 2          │
        │ Boston    ┆ 2024 ┆ USA     ┆ 8          │
        └───────────┴──────┴─────────┴────────────┘
    """
    mapping_set = Set(mapping_set)

    dims = self.dimensions
    if dims is None:
        raise ValueError("Cannot use .map() on an expression with no dimensions.")

    mapping_dims = mapping_set.dimensions
    if mapping_dims is None:
        raise ValueError(
            "Cannot use .map() with a mapping set containing no dimensions."
        )

    shared_dims = [dim for dim in dims if dim in mapping_dims]
    if not shared_dims:
        raise ValueError(
            f"Cannot apply .map() as there are no shared dimensions between the expression (dims={self.dimensions}) and the mapping set (dims={mapping_set.dimensions})."
        )

    mapped_expression = self * mapping_set

    if drop_shared_dims:
        mapped_expression = mapped_expression.sum(*shared_dims)

    mapped_expression.name = f"{self.name}.map(…)"

    return mapped_expression

`rolling_sum(over: str, window_size: int) -> Expression`

Calculates the rolling sum of the Expression over a specified window size for a given dimension.

This method applies a rolling sum operation over the dimension specified by over, using a window defined by window_size.

Parameters:

Name	Type	Description	Default
`over`	`str`	The name of the dimension (column) over which the rolling sum is calculated. This dimension must exist within the Expression's dimensions.	required
`window_size`	`int`	The size of the moving window in terms of number of records. The rolling sum is calculated over this many consecutive elements.	required

Returns:

Type	Description
`Expression`	A new Expression instance containing the result of the rolling sum operation. This new Expression retains all dimensions (columns) of the original data, with the rolling sum applied over the specified dimension.

Examples:

>>> import polars as pl
>>> cost = pl.DataFrame(
...     {
...         "item": [1, 1, 1, 2, 2],
...         "time": [1, 2, 3, 1, 2],
...         "cost": [1, 2, 3, 4, 5],
...     }
... )
>>> m = pf.Model()
>>> m.quantity = pf.Variable(cost[["item", "time"]])
>>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
<Expression (linear) height=5 terms=8>
┌──────┬──────┬──────────────────────────────────┐
│ item ┆ time ┆ expression                       │
│ (2)  ┆ (3)  ┆                                  │
╞══════╪══════╪══════════════════════════════════╡
│ 1    ┆ 1    ┆ quantity[1,1]                    │
│ 1    ┆ 2    ┆ quantity[1,1] +2 quantity[1,2]   │
│ 1    ┆ 3    ┆ 2 quantity[1,2] +3 quantity[1,3] │
│ 2    ┆ 1    ┆ 4 quantity[2,1]                  │
│ 2    ┆ 2    ┆ 4 quantity[2,1] +5 quantity[2,2] │
└──────┴──────┴──────────────────────────────────┘

Source code in pyoframe/_core.py

@return_new
def rolling_sum(self, over: str, window_size: int):
    """Calculates the rolling sum of the Expression over a specified window size for a given dimension.

    This method applies a rolling sum operation over the dimension specified by `over`,
    using a window defined by `window_size`.


    Parameters:
        over:
            The name of the dimension (column) over which the rolling sum is calculated.
            This dimension must exist within the Expression's dimensions.
        window_size:
            The size of the moving window in terms of number of records.
            The rolling sum is calculated over this many consecutive elements.

    Returns:
        A new Expression instance containing the result of the rolling sum operation.
            This new Expression retains all dimensions (columns) of the original data,
            with the rolling sum applied over the specified dimension.

    Examples:
        >>> import polars as pl
        >>> cost = pl.DataFrame(
        ...     {
        ...         "item": [1, 1, 1, 2, 2],
        ...         "time": [1, 2, 3, 1, 2],
        ...         "cost": [1, 2, 3, 4, 5],
        ...     }
        ... )
        >>> m = pf.Model()
        >>> m.quantity = pf.Variable(cost[["item", "time"]])
        >>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
        <Expression (linear) height=5 terms=8>
        ┌──────┬──────┬──────────────────────────────────┐
        │ item ┆ time ┆ expression                       │
        │ (2)  ┆ (3)  ┆                                  │
        ╞══════╪══════╪══════════════════════════════════╡
        │ 1    ┆ 1    ┆ quantity[1,1]                    │
        │ 1    ┆ 2    ┆ quantity[1,1] +2 quantity[1,2]   │
        │ 1    ┆ 3    ┆ 2 quantity[1,2] +3 quantity[1,3] │
        │ 2    ┆ 1    ┆ 4 quantity[2,1]                  │
        │ 2    ┆ 2    ┆ 4 quantity[2,1] +5 quantity[2,2] │
        └──────┴──────┴──────────────────────────────────┘
    """
    dims = self.dimensions
    if dims is None:
        raise ValueError(
            "Cannot use rolling_sum() with an expression with no dimensions."
        )
    assert over in dims, f"Cannot sum over {over} as it is not in {dims}"
    remaining_dims = [dim for dim in dims if dim not in over]

    return pl.concat(
        [
            df.with_columns(pl.col(over).max())
            for _, df in self.data.rolling(
                index_column=over,
                period=f"{window_size}i",
                group_by=remaining_dims,
            )
        ]
    )

`sum(*over: str) -> Expression`

Sums an expression over specified dimensions.

If no dimensions are specified, the sum is taken over all of the expression's dimensions.

Examples:

>>> expr = pf.Param(
...     {
...         "time": ["mon", "tue", "wed", "mon", "tue"],
...         "place": [
...             "Toronto",
...             "Toronto",
...             "Toronto",
...             "Vancouver",
...             "Vancouver",
...         ],
...         "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
...     }
... )
>>> expr
<Expression (parameter) height=5 terms=5>
┌──────┬───────────┬────────────┐
│ time ┆ place     ┆ expression │
│ (3)  ┆ (2)       ┆            │
╞══════╪═══════════╪════════════╡
│ mon  ┆ Toronto   ┆ 1000000    │
│ tue  ┆ Toronto   ┆ 3000000    │
│ wed  ┆ Toronto   ┆ 2000000    │
│ mon  ┆ Vancouver ┆ 1000000    │
│ tue  ┆ Vancouver ┆ 2000000    │
└──────┴───────────┴────────────┘
>>> expr.sum("time")
<Expression (parameter) height=2 terms=2>
┌───────────┬────────────┐
│ place     ┆ expression │
│ (2)       ┆            │
╞═══════════╪════════════╡
│ Toronto   ┆ 6000000    │
│ Vancouver ┆ 3000000    │
└───────────┴────────────┘
>>> expr.sum()
<Expression (parameter) terms=1>
9000000

If the given dimensions don't exist, an error will be raised:

>>> expr.sum("city")
Traceback (most recent call last):
...
AssertionError: Cannot sum over ['city'] as it is not in ['time', 'place']

`sum_by(*by: str)`

Like Expression.sum, but the sum is taken over all dimensions except those specified in by (just like a group_by().sum() operation).

Examples:

>>> expr = pf.Param(
...     {
...         "time": ["mon", "tue", "wed", "mon", "tue"],
...         "place": [
...             "Toronto",
...             "Toronto",
...             "Toronto",
...             "Vancouver",
...             "Vancouver",
...         ],
...         "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
...     }
... )
>>> expr
<Expression (parameter) height=5 terms=5>
┌──────┬───────────┬────────────┐
│ time ┆ place     ┆ expression │
│ (3)  ┆ (2)       ┆            │
╞══════╪═══════════╪════════════╡
│ mon  ┆ Toronto   ┆ 1000000    │
│ tue  ┆ Toronto   ┆ 3000000    │
│ wed  ┆ Toronto   ┆ 2000000    │
│ mon  ┆ Vancouver ┆ 1000000    │
│ tue  ┆ Vancouver ┆ 2000000    │
└──────┴───────────┴────────────┘

>>> expr.sum_by("place")
<Expression (parameter) height=2 terms=2>
┌───────────┬────────────┐
│ place     ┆ expression │
│ (2)       ┆            │
╞═══════════╪════════════╡
│ Toronto   ┆ 6000000    │
│ Vancouver ┆ 3000000    │
└───────────┴────────────┘

If the specified dimensions don't exist, an error will be raised:

>>> expr.sum_by("city")
Traceback (most recent call last):
...
ValueError: Cannot sum by ['city'] because it is not a valid dimension. The expression's dimensions are: ['time', 'place'].

>>> total_sum = expr.sum()
>>> total_sum.sum_by("time")
Traceback (most recent call last):
...
ValueError: Cannot sum a dimensionless expression.

`to_expr() -> Expression`

Returns the expression itself.

Source code in pyoframe/_core.py

def to_expr(self) -> Expression:
    """Returns the expression itself."""
    return self

`to_str(str_col_name: str = 'expression', include_const_term: bool = True, return_df: bool = False) -> str | pl.DataFrame`

to_str(
    str_col_name: str = "expression",
    include_const_term: bool = True,
    return_df: Literal[False] = False,
) -> str

to_str(
    str_col_name: str = "expression",
    include_const_term: bool = True,
    return_df: Literal[True] = True,
) -> pl.DataFrame

Converts the expression to a human-readable string, or several arranged in a table.

Long expressions are truncated according to Config.print_max_terms and Config.print_polars_config.

str(pyoframe.Expression) is equivalent to pyoframe.Expression.to_str().

Parameters:

Name	Type	Description	Default
`str_col_name`	`str`	The name of the column containing the string representation of the expression (dimensioned expressions only).	`'expression'`
`include_const_term`	`bool`	If `False`, constant terms are omitted from the string representation.	`True`
`return_df`	`bool`	If `True`, returns a DataFrame containing the human-readable strings instead of the DataFrame's string representation.	`False`

Examples:

>>> import polars as pl
>>> m = pf.Model()
>>> x = pf.Set(x=range(1000))
>>> y = pf.Set(y=range(1000))
>>> m.V = pf.Variable(x, y)
>>> expr = 2 * m.V * m.V + 3
>>> print(expr.to_str())
┌────────┬────────┬──────────────────────────────┐
│ x      ┆ y      ┆ expression                   │
│ (1000) ┆ (1000) ┆                              │
╞════════╪════════╪══════════════════════════════╡
│ 0      ┆ 0      ┆ 3 +2 V[0,0] * V[0,0]         │
│ 0      ┆ 1      ┆ 3 +2 V[0,1] * V[0,1]         │
│ 0      ┆ 2      ┆ 3 +2 V[0,2] * V[0,2]         │
│ 0      ┆ 3      ┆ 3 +2 V[0,3] * V[0,3]         │
│ 0      ┆ 4      ┆ 3 +2 V[0,4] * V[0,4]         │
│ …      ┆ …      ┆ …                            │
│ 999    ┆ 995    ┆ 3 +2 V[999,995] * V[999,995] │
│ 999    ┆ 996    ┆ 3 +2 V[999,996] * V[999,996] │
│ 999    ┆ 997    ┆ 3 +2 V[999,997] * V[999,997] │
│ 999    ┆ 998    ┆ 3 +2 V[999,998] * V[999,998] │
│ 999    ┆ 999    ┆ 3 +2 V[999,999] * V[999,999] │
└────────┴────────┴──────────────────────────────┘
>>> expr = expr.sum("y")
>>> print(expr.to_str())
┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
│ x      ┆ expression                                                                              │
│ (1000) ┆                                                                                         │
╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
│ 0      ┆ 3000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …      │
│ 1      ┆ 3000 +2 V[1,0] * V[1,0] +2 V[1,1] * V[1,1] +2 V[1,2] * V[1,2] +2 V[1,3] * V[1,3] …      │
│ 2      ┆ 3000 +2 V[2,0] * V[2,0] +2 V[2,1] * V[2,1] +2 V[2,2] * V[2,2] +2 V[2,3] * V[2,3] …      │
│ 3      ┆ 3000 +2 V[3,0] * V[3,0] +2 V[3,1] * V[3,1] +2 V[3,2] * V[3,2] +2 V[3,3] * V[3,3] …      │
│ 4      ┆ 3000 +2 V[4,0] * V[4,0] +2 V[4,1] * V[4,1] +2 V[4,2] * V[4,2] +2 V[4,3] * V[4,3] …      │
│ …      ┆ …                                                                                       │
│ 995    ┆ 3000 +2 V[995,0] * V[995,0] +2 V[995,1] * V[995,1] +2 V[995,2] * V[995,2] +2 V[995,3] * │
│        ┆ V[995,3] …                                                                              │
│ 996    ┆ 3000 +2 V[996,0] * V[996,0] +2 V[996,1] * V[996,1] +2 V[996,2] * V[996,2] +2 V[996,3] * │
│        ┆ V[996,3] …                                                                              │
│ 997    ┆ 3000 +2 V[997,0] * V[997,0] +2 V[997,1] * V[997,1] +2 V[997,2] * V[997,2] +2 V[997,3] * │
│        ┆ V[997,3] …                                                                              │
│ 998    ┆ 3000 +2 V[998,0] * V[998,0] +2 V[998,1] * V[998,1] +2 V[998,2] * V[998,2] +2 V[998,3] * │
│        ┆ V[998,3] …                                                                              │
│ 999    ┆ 3000 +2 V[999,0] * V[999,0] +2 V[999,1] * V[999,1] +2 V[999,2] * V[999,2] +2 V[999,3] * │
│        ┆ V[999,3] …                                                                              │
└────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
>>> expr = expr.sum("x")
>>> print(expr.to_str())
3000000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …

Source code in pyoframe/_core.py

def to_str(
    self,
    str_col_name: str = "expression",
    include_const_term: bool = True,
    return_df: bool = False,
) -> str | pl.DataFrame:
    """Converts the expression to a human-readable string, or several arranged in a table.

    Long expressions are truncated according to [`Config.print_max_terms`][pyoframe._Config.print_max_terms] and [`Config.print_polars_config`][pyoframe._Config.print_polars_config].

    `str(pyoframe.Expression)` is equivalent to `pyoframe.Expression.to_str()`.

    Parameters:
        str_col_name:
            The name of the column containing the string representation of the expression (dimensioned expressions only).
        include_const_term:
            If `False`, constant terms are omitted from the string representation.
        return_df:
            If `True`, returns a DataFrame containing the human-readable strings instead of the DataFrame's string representation.

    Examples:
        >>> import polars as pl
        >>> m = pf.Model()
        >>> x = pf.Set(x=range(1000))
        >>> y = pf.Set(y=range(1000))
        >>> m.V = pf.Variable(x, y)
        >>> expr = 2 * m.V * m.V + 3
        >>> print(expr.to_str())
        ┌────────┬────────┬──────────────────────────────┐
        │ x      ┆ y      ┆ expression                   │
        │ (1000) ┆ (1000) ┆                              │
        ╞════════╪════════╪══════════════════════════════╡
        │ 0      ┆ 0      ┆ 3 +2 V[0,0] * V[0,0]         │
        │ 0      ┆ 1      ┆ 3 +2 V[0,1] * V[0,1]         │
        │ 0      ┆ 2      ┆ 3 +2 V[0,2] * V[0,2]         │
        │ 0      ┆ 3      ┆ 3 +2 V[0,3] * V[0,3]         │
        │ 0      ┆ 4      ┆ 3 +2 V[0,4] * V[0,4]         │
        │ …      ┆ …      ┆ …                            │
        │ 999    ┆ 995    ┆ 3 +2 V[999,995] * V[999,995] │
        │ 999    ┆ 996    ┆ 3 +2 V[999,996] * V[999,996] │
        │ 999    ┆ 997    ┆ 3 +2 V[999,997] * V[999,997] │
        │ 999    ┆ 998    ┆ 3 +2 V[999,998] * V[999,998] │
        │ 999    ┆ 999    ┆ 3 +2 V[999,999] * V[999,999] │
        └────────┴────────┴──────────────────────────────┘
        >>> expr = expr.sum("y")
        >>> print(expr.to_str())
        ┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
        │ x      ┆ expression                                                                              │
        │ (1000) ┆                                                                                         │
        ╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
        │ 0      ┆ 3000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …      │
        │ 1      ┆ 3000 +2 V[1,0] * V[1,0] +2 V[1,1] * V[1,1] +2 V[1,2] * V[1,2] +2 V[1,3] * V[1,3] …      │
        │ 2      ┆ 3000 +2 V[2,0] * V[2,0] +2 V[2,1] * V[2,1] +2 V[2,2] * V[2,2] +2 V[2,3] * V[2,3] …      │
        │ 3      ┆ 3000 +2 V[3,0] * V[3,0] +2 V[3,1] * V[3,1] +2 V[3,2] * V[3,2] +2 V[3,3] * V[3,3] …      │
        │ 4      ┆ 3000 +2 V[4,0] * V[4,0] +2 V[4,1] * V[4,1] +2 V[4,2] * V[4,2] +2 V[4,3] * V[4,3] …      │
        │ …      ┆ …                                                                                       │
        │ 995    ┆ 3000 +2 V[995,0] * V[995,0] +2 V[995,1] * V[995,1] +2 V[995,2] * V[995,2] +2 V[995,3] * │
        │        ┆ V[995,3] …                                                                              │
        │ 996    ┆ 3000 +2 V[996,0] * V[996,0] +2 V[996,1] * V[996,1] +2 V[996,2] * V[996,2] +2 V[996,3] * │
        │        ┆ V[996,3] …                                                                              │
        │ 997    ┆ 3000 +2 V[997,0] * V[997,0] +2 V[997,1] * V[997,1] +2 V[997,2] * V[997,2] +2 V[997,3] * │
        │        ┆ V[997,3] …                                                                              │
        │ 998    ┆ 3000 +2 V[998,0] * V[998,0] +2 V[998,1] * V[998,1] +2 V[998,2] * V[998,2] +2 V[998,3] * │
        │        ┆ V[998,3] …                                                                              │
        │ 999    ┆ 3000 +2 V[999,0] * V[999,0] +2 V[999,1] * V[999,1] +2 V[999,2] * V[999,2] +2 V[999,3] * │
        │        ┆ V[999,3] …                                                                              │
        └────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
        >>> expr = expr.sum("x")
        >>> print(expr.to_str())
        3000000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …

    """
    # TODO consider optimizing using LazyFrames since .head() could maybe be automatically pushed up the chain of operations.
    data = self.data if include_const_term else self.variable_terms
    data = cast_coef_to_string(data)

    for var_col in self._variable_columns:
        temp_var_column = f"{var_col}_temp"
        if self._model is not None and self._model._var_map is not None:
            data = self._model._var_map.apply(
                data, to_col=temp_var_column, id_col=var_col
            )
        else:
            data = data.with_columns(
                pl.concat_str(pl.lit("x"), var_col).alias(temp_var_column)
            )
        data = data.with_columns(
            pl.when(pl.col(var_col) == CONST_TERM)
            .then(pl.lit(""))
            .otherwise(temp_var_column)
            .alias(var_col)
        ).drop(temp_var_column)
    if self.is_quadratic:
        data = data.with_columns(
            pl.when(pl.col(QUAD_VAR_KEY) == "")
            .then(pl.col(VAR_KEY))
            .otherwise(pl.concat_str(VAR_KEY, pl.lit(" * "), pl.col(QUAD_VAR_KEY)))
            .alias(VAR_KEY)
        ).drop(QUAD_VAR_KEY)

    dimensions = self.dimensions

    # Create a string for each term
    data = data.with_columns(
        pl.concat_str(
            COEF_KEY,
            pl.lit(" "),
            VAR_KEY,
        )
        .str.strip_chars(characters="  ")
        .alias(str_col_name)
    ).drop(COEF_KEY, VAR_KEY)

    if dimensions is not None:
        data = data.group_by(dimensions, maintain_order=Config.maintain_order).agg(
            pl.concat_str(
                pl.col(str_col_name)
                .head(Config.print_max_terms)
                .str.join(delimiter=" "),
                pl.when(pl.len() > Config.print_max_terms)
                .then(pl.lit(" …"))
                .otherwise(pl.lit("")),
            )
        )
    else:
        truncate = data.height > Config.print_max_terms
        if truncate:
            data = data.head(Config.print_max_terms)

        data = data.select(pl.col(str_col_name).str.join(delimiter=" "))

        if truncate:
            data = data.with_columns(
                pl.concat_str(pl.col(str_col_name), pl.lit(" …"))
            )

    # Remove leading +
    data = data.with_columns(pl.col(str_col_name).str.strip_chars(characters="  +"))

    if not return_df:
        if dimensions is None and not self._allowed_new_dims:
            data = data.item()
        else:
            data = self._add_shape_to_columns(data)
            data = self._add_allowed_new_dims_to_df(data)
            with Config.print_polars_config:
                data = repr(data)

    return data

`within(set: SetTypes) -> Expression`

Filters this expression to only include the dimensions within the provided set.

Examples:

>>> general_expr = pf.Param({"dim1": [1, 2, 3], "value": [1, 2, 3]})
>>> filter_expr = pf.Param({"dim1": [1, 3], "value": [5, 6]})
>>> general_expr.within(filter_expr).data
shape: (2, 3)
┌──────┬─────────┬───────────────┐
│ dim1 ┆ __coeff ┆ __variable_id │
│ ---  ┆ ---     ┆ ---           │
│ i64  ┆ f64     ┆ u32           │
╞══════╪═════════╪═══════════════╡
│ 1    ┆ 1.0     ┆ 0             │
│ 3    ┆ 3.0     ┆ 0             │
└──────┴─────────┴───────────────┘

Source code in pyoframe/_core.py

@return_new
def within(self, set: SetTypes):
    """Filters this expression to only include the dimensions within the provided set.

    Examples:
        >>> general_expr = pf.Param({"dim1": [1, 2, 3], "value": [1, 2, 3]})
        >>> filter_expr = pf.Param({"dim1": [1, 3], "value": [5, 6]})
        >>> general_expr.within(filter_expr).data
        shape: (2, 3)
        ┌──────┬─────────┬───────────────┐
        │ dim1 ┆ __coeff ┆ __variable_id │
        │ ---  ┆ ---     ┆ ---           │
        │ i64  ┆ f64     ┆ u32           │
        ╞══════╪═════════╪═══════════════╡
        │ 1    ┆ 1.0     ┆ 0             │
        │ 3    ┆ 3.0     ┆ 0             │
        └──────┴─────────┴───────────────┘
    """
    df: pl.DataFrame = Set(set).data
    set_dims = _get_dimensions(df)
    assert set_dims is not None, (
        "Cannot use .within() with a set with no dimensions."
    )
    dims = self.dimensions
    assert dims is not None, (
        "Cannot use .within() with an expression with no dimensions."
    )
    dims_in_common = [dim for dim in dims if dim in set_dims]
    by_dims = df.select(dims_in_common).unique(maintain_order=Config.maintain_order)
    return self.data.join(
        by_dims,
        on=dims_in_common,
        maintain_order="left" if Config.maintain_order else None,
    )

Expression

constant_terms: pl.DataFrame

is_quadratic: bool

terms: int

variable_terms: pl.DataFrame

constant(constant: int | float) -> Expression

degree(return_str: bool = False) -> int | str

evaluate() -> pl.DataFrame

map(mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression

rolling_sum(over: str, window_size: int) -> Expression

sum(*over: str) -> Expression

sum_by(*by: str)

to_expr() -> Expression

to_str(str_col_name: str = 'expression', include_const_term: bool = True, return_df: bool = False) -> str | pl.DataFrame

within(set: SetTypes) -> Expression

Comments

`constant_terms: pl.DataFrame`

`is_quadratic: bool`

`terms: int`

`variable_terms: pl.DataFrame`

`constant(constant: int | float) -> Expression`

`degree(return_str: bool = False) -> int | str`

`evaluate() -> pl.DataFrame`

`map(mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression`

`rolling_sum(over: str, window_size: int) -> Expression`

`sum(*over: str) -> Expression`

`sum_by(*by: str)`

`to_expr() -> Expression`

`to_str(str_col_name: str = 'expression', include_const_term: bool = True, return_df: bool = False) -> str | pl.DataFrame`

`within(set: SetTypes) -> Expression`