# -*- coding: utf-8 -*-
"""openepda.expr.py
This file contains a parser of analytic expressions.
The parser can be used to parse the analytic expressions and thus
check their correctness in a safe way, and also to evaluate the
expressions.
The grammar is intended to be very similar to the tinyexpr library
grammar (https://github.com/codeplea/tinyexpr).
Author: Dima Pustakhod
Copyright: 2020--2022, TU/e -- PITC and authors
"""
import re
from collections import Counter
from functools import lru_cache, partial
from importlib import import_module
from itertools import chain
from keyword import kwlist
from numbers import Real
from operator import contains
from typing import (
Any,
ByteString,
Dict,
Iterable,
List,
MutableSequence,
Sequence,
Union,
)
import tatsu
from more_itertools import collapse
from tatsu.exceptions import FailedParse
# grammar symbols
OPERATORS = tuple("+-*/^%")
BRACKETS = tuple("()")
LIST_SEPARATOR = tuple(",")
LETTERS_LOWER = tuple("abcdefghijklmnopqrstuvwxyz")
LETTERS_UPPER = tuple(l.upper() for l in LETTERS_LOWER)
LETTERS_ALL = LETTERS_LOWER + LETTERS_UPPER
DIGITS = tuple("0123465789")
UNDERSCORE = tuple("_")
VARIABLE_NAME = LETTERS_ALL + UNDERSCORE + DIGITS
SPACE = tuple(" ")
ALLOWED_SYMBOLS = (
LETTERS_ALL
+ DIGITS
+ BRACKETS
+ LIST_SEPARATOR
+ UNDERSCORE
+ SPACE
+ OPERATORS
)
# template includes placeholders for optional items. In the comments are
# examples of what should be in the placeholder.
GRAMMAR_TEMPLATE = r"""
@@grammar :: openepda_expr
@@whitespace :: / /
start = list $ ;
list = expr ',' list | expr ;
expr = expr {{('+' | '-') term}} | term ;
term = term '%' factor | term '*' factor | term '/' factor | factor ;
factor = factor "{power_op}" power | power ; # association right or left
power = '+' power | '-' power | base ;
base = number
{func2_base} # | func2 '(' expr ',' expr ')'
{func1_base} # | func1 '(' expr ')'
{allow_skip_parenth} # | func1 power # optional
{allow_parenth_after_const} # | const '(' ')' # optional
{const_base} # | const # optional
| var
| "(" list ")" ;
number = /\d*\.\d+[eE][+-]?\d+/
| /\d+\.?[eE][+-]?\d+/
| /\d*\.\d+/
| /\d+\.?/ ;
{const_list} # const = 'e' | 'pi' ;
{func1_list} # func1 = 'abs' | 'acos' | 'asin' | 'atan' | 'ceil' | 'cos'
# | 'cosh' |'exp' | 'fac' | 'floor' | 'ln' | 'log' | 'log10'
# | 'sin' |'sinh' | 'sqrt' | 'tan' | 'tanh' ;
{func2_list} # func2 = 'atan2' | 'pow' | 'npr' | 'ncr' ;
# var = /[a-zA-Z][a-zA-Z0-9_]*/ ;
var = {exclude} var:/[a-zA-Z][a-zA-Z0-9_]*/;
"""
CHOICES_TEMPLATE = "{name} = {choices_str} ;"
EXCLUDE_TEMPLATE = "! ( {choices_str} )"
# Predefined identifiers
CONSTANTS_MAP = {"pi": ("math", "pi"), "e": ("math", "e")}
CONSTANTS = tuple(CONSTANTS_MAP.keys())
# Mapping of functions available in the expressions, 1 and 2 parameters
STANDARD_FCNS_1_ARG_MAP = {
"abs": ("math", "fabs"),
"acos": ("math", "acos"),
"asin": ("math", "asin"),
"atan": ("math", "atan"),
"ceil": ("math", "ceil"),
"cos": ("math", "cos"),
"cosh": ("math", "cosh"),
"exp": ("math", "exp"),
"fac": ("math", "factorial"),
"floor": ("math", "floor"),
"ln": ("math", "log"),
# "log": ("math", "log"), # option to use log10
"log10": ("math", "log10"),
"sin": ("math", "sin"),
"sinh": ("math", "sinh"),
"sqrt": ("math", "sqrt"),
"tan": ("math", "tan"),
"tanh": ("math", "tanh"),
}
STANDARD_FCNS_1_ARG = tuple(STANDARD_FCNS_1_ARG_MAP.keys())
STANDARD_FCNS_2_ARG_MAP = {
"atan2": ("math", "atan2"),
"pow": ("math", "pow"),
"ncr": ("scipy.special", "comb"),
"npr": ("scipy.special", "perm"),
}
STANDARD_FCNS_2_ARG = tuple(STANDARD_FCNS_2_ARG_MAP.keys())
[docs]class OpenEpdaError(Exception):
pass
[docs]class OpenEpdaExpressionError(Exception):
pass
class _BaseParser(object):
KW_SUFFIX = "_KW"
def __init__(
self,
allow_skip_parenth=False,
use_nat_log=False,
power_from_right=True,
use_caret_for_power=True,
constants=None,
functions_1=None,
functions_2=None,
allow_parenth_after_const=False,
parameters=(),
exclude=(),
):
"""Create a parser.
`constants`, `functions_1`, `functions_2` are dictionaries in the
form: {'expression_name': ('module_name', 'module_attribute')}.
`parameters` are just an iterable with variable identifiers.
Their values are submitted to the eval method.
Parameters
----------
allow_skip_parenth : bool
if True, in expressions with function of one argument
(e.g. 'sin 0.5') parentheses can be omitted. If False,
parentheses are required. Note, that if this option is True,
parsing of the expression is possible, but evaluation is not
currently supported.
use_nat_log : bool
if True, `log` evaluates to the natural logarithm. Otherwise,
it is log10.
power_from_right : bool
if True, the following association rules are used for
exponentiation: `a^b^c == a^(b^c)` and `-a^b == -(a^b)`. This is
default Python behavior. If False, the following rules are used:
`a^b^c == (a^b)^c` and `-a^b == (-a)^b`. Note, that if this
option is True, parsing of the expression is possible, but
evaluation is not currently supported.
use_caret_for_power : bool
if True, caret symbol '^' will be used for power operation.
Otherwise, a standard python operator '**' is used.
constants : dict
constants to be allowed in the expression.
functions_1 : dict
list of functions of 1 argument to be allowed in the expression.
functions_2 : dict
list of functions of 2 arguments to be allowed in the expression.
parameters : list
list of parameter identifiers to be allowed in the expression.
exclude : iterable of str
identifiers to be excluded from the final list
"""
# parsing options
self._allow_parenth_after_const = allow_parenth_after_const
self._use_caret_for_power = use_caret_for_power
self._allow_skip_parenth = allow_skip_parenth # not used
self._power_from_right = power_from_right # not used
# evaluation option
self._use_nat_log = use_nat_log
self._exclude = exclude
constants = constants or {}
functions_1 = functions_1 or {}
functions_2 = functions_2 or {}
# member variables
self._parser = None
self._imported_symbols = {}
# import and add symbols
self._check_symbols(constants.keys())
added_symbols = self._add_symbols(constants, exclude=self._exclude)
self._constants = added_symbols
self._check_symbols(functions_1.keys())
added_symbols = self._add_symbols(functions_1, exclude=self._exclude)
self._f1 = added_symbols
self._check_symbols(functions_2.keys())
added_symbols = self._add_symbols(functions_2, exclude=self._exclude)
self._f2 = added_symbols
self._check_symbols(parameters)
self._vars = parameters
# create parser
self.compile_parser()
def _check_symbols(self, symbols):
"""Check if the symbols are allowed in the expression
The check will not pass if:
- there are repetitions inside symbols
- any symbol was already defined before in this parser
Parameters
----------
symbols : iterable of str
Returns
-------
True
Raises
------
ValueError:
if the check fails.
"""
if len(symbols) == 0:
return True
# check there is no repeated symbols
repeated_symb, no_repetitions = Counter(symbols).most_common()[0]
if no_repetitions > 1:
raise OpenEpdaExpressionError(
'Symbol "{}" is defined more than once ({} times).'.format(
repeated_symb, no_repetitions
)
)
# Check each symbol
for s in symbols:
if s in self._imported_symbols.keys():
# already defined symbols
raise OpenEpdaExpressionError(
'Symbol "{}" is already defined.'.format(s)
)
return True
def _clean_keywords(self, expr, param_values):
param_values = param_values.copy()
for kw in kwlist:
expr, n_repl = re.subn(
r"\b{}\b".format(kw), "{}{}".format(kw, self.KW_SUFFIX), expr
)
if n_repl:
v = param_values.pop(kw)
param_values.update({"{}{}".format(kw, self.KW_SUFFIX): v})
return expr, param_values
@property
def reserved_identifiers(self):
return tuple(self._imported_symbols)
def _add_symbols(self, symbols, exclude=()):
"""
Parameters
----------
symbols : dict
exclude : iterable of str
Returns
-------
tuple of str
list of imported symbols
"""
res = make_symbol_mapping(symbols, exclude=exclude)
self._imported_symbols.update(res)
return tuple(res.keys())
def compile_parser(self):
"""Create a parser.
This is an expensive action, so preferably has to be done only
once. Parsing itself is much faster.
"""
# Create data to fill grammar template
# lists of symbols
const_list = make_grammar_choice_item("const", self._constants)
func1_list = make_grammar_choice_item("func1", self._f1)
func2_list = make_grammar_choice_item("func2", self._f2)
const_base = r"| const" if const_list else ""
func1_base = r"| func1 '(' expr ')'" if func1_list else ""
func2_base = r"| func2 '(' expr ',' expr ')'" if func2_list else ""
exclude = make_grammar_exclude_item(
kwlist, self._constants, self._f1, self._f2
)
# optional items
if self._allow_skip_parenth and func1_list:
allow_skip_parenth = "| func1 power"
else:
allow_skip_parenth = ""
if self._allow_parenth_after_const and const_list:
allow_parenth_after_const = "| const '(' ')'"
else:
allow_parenth_after_const = ""
power_op = "^" if self._use_caret_for_power else "**"
grammar = GRAMMAR_TEMPLATE.format(
power_op=power_op,
const_base=const_base,
const_list=const_list,
func1_base=func1_base,
func1_list=func1_list,
func2_base=func2_base,
func2_list=func2_list,
allow_skip_parenth=allow_skip_parenth,
allow_parenth_after_const=allow_parenth_after_const,
exclude=exclude,
)
# print(grammar)
self._parser = tatsu.compile(grammar)
@lru_cache(maxsize=1000)
def parse(self, expr):
"""Parse the expression.
Parameters
----------
expr : str
expression to be parsed
Returns
-------
tatsu.contexts.closure
AST for the expression
"""
if not self._parser:
raise OpenEpdaExpressionError("Parser has not been created yet.")
if expr == "":
raise OpenEpdaExpressionError("Cannot parse empty expression")
try:
ast = self._parser.parse(expr)
except FailedParse as e:
raise OpenEpdaExpressionError(
"error parsing expression '{}': {}.".format(expr, e)
)
return ast
def get_parameter_names(self, expr: str):
"""Parse expression and get the names of parameters used in it.
Parameters
----------
expr : str
expression to the parsed
Returns
-------
set of str
set of parameter names present in the expression
"""
# Replace all numeric values by 666. This is done to make
# better use of parser cache: for the task of checking
# parameter names it does not matter which numeric values are
# there.
# several digits followed by optional decimal part
pattern = r"\b\d+\.?\d*\b"
repl = "666"
expr = re.sub(pattern, repl, expr)
# Clean the keywords
# expr = self._clean_keywords(expr)
ast = self.parse(expr)
flat_ast = collapse(ast, base_type=dict)
param_names = {
item["var"] for item in flat_ast if isinstance(item, dict)
}
return param_names
def eval(self, expr, param_values: Dict[str, Any] = None):
"""
Parameters
----------
expr : str
expression to be evaluated
param_values : Dict[str, Any]
{'parameter_name': parameter_value}
Returns
-------
Any
evaluation result
"""
if not self._power_from_right:
raise NotImplementedError(
"Evaluations of expressions with left association of "
"exponentiation is not implemented."
)
if self._allow_skip_parenth:
raise NotImplementedError(
"Evaluations of expressions with missing parentheses "
"for a function of one argument is not implemented."
)
param_values = param_values or {}
self.check_missing_params(expr, frozenset(param_values.keys()))
if self._allow_parenth_after_const:
for c in self._constants:
expr = re.sub(r"{}\( *\)".format(c), "{}".format(c), expr)
if self._use_caret_for_power:
expr = expr.replace("^", "**")
expr, param_values = self._clean_keywords(expr, param_values)
res = eval(expr, self._imported_symbols, param_values)
return res
def check_missing_params(self, expr, params: Iterable[str]):
"""Verify that all params that are present in the expr are present.
Parameters
----------
expr : str
params : Iterable of str
allowed parameters
"""
actual_param_names = self.get_parameter_names(expr)
missing_params = actual_param_names - set(params)
if missing_params:
raise OpenEpdaExpressionError(
"expression '{}' contains undefined params: {}.".format(
expr, missing_params
)
)
else:
return True
[docs]def make_choice_list(*choices, exclude=()):
"""Combine choices into a single list with unique items.
Some of the choices can be excluded.
Parameters
----------
choices : iterable of str
each iterable contains a list of functions to be included into
the symbols.
exclude: iterable of str
list of symbols to be excluded.
Returns
-------
list of str
sorted list of symbols, where all choices are united, and
strings from exclude parameter are removed.
"""
choices_clean = sorted(set(chain.from_iterable(choices)))
ch = list(c for c in choices_clean if c not in exclude)
return ch
[docs]def make_symbol_mapping(mapping, exclude=()):
"""Import all symbols from the mapping using their name
Parameters
----------
mapping : Dict[str, Tuple(str, str)]
keys are symbol names, values are 2-tuples. value[0] is a
module name, value[1] is attribute name to be imported from this
module and to be used under symbol name.
exclude : iterable of str
identifiers to be excluded from the final list
Returns
-------
Dict[str, Any]
keys are imported symbol names, values are imported entities,
e.g. function, parameters, etc.
"""
res = {}
for symbol_name, (module_name, attr) in mapping.items():
if symbol_name in exclude:
continue
m = import_module(module_name)
try:
symbol = getattr(m, attr)
except AttributeError as e:
raise ValueError(
"Incorrect data for symbol '{}', cannot import it: {}.".format(
symbol_name, e
)
)
res.update({symbol_name: symbol})
return res
[docs]def make_choice_str(choices=None):
"""
Parameters
----------
choices : iterable of str
Returns
-------
str
"""
if choices:
choices_str = "' | '".join(choices)
if choices_str != "":
choices_str = "'{}'".format(choices_str)
else:
choices_str = ""
return choices_str
def make_grammar_choice_item(name="", choices=None):
choices_str = make_choice_str(choices)
if choices_str != "":
res = CHOICES_TEMPLATE.format(name=name, choices_str=choices_str)
else:
res = ""
return res
def make_grammar_exclude_item(*choices):
choices_flat = chain.from_iterable(choices)
choices_str = make_choice_str(choices_flat)
if choices_str != "":
res = EXCLUDE_TEMPLATE.format(choices_str=choices_str)
else:
res = ""
return res
[docs]def check_parameter_name(name, reserved=()):
"""Check if name can be used as identifier in the expression.
- Contains letters, underscores, or digits
- Starts with a letter
- Is not one of the reserved identifiers
Parameters
----------
name : str
identifier to be checked
reserved : iterable of str
identifiers which are reserved and are therefore illegal,
e.g. for other functions.
Returns
-------
bool
True if name is a correct identifier
"""
if not (name[0] in LETTERS_LOWER or name[0] in LETTERS_UPPER):
# First symbol
raise OpenEpdaExpressionError(
'First symbol of the parameter name "{}" is not one of allowed'
" symbols.".format(name)
)
elif set(name) - set(VARIABLE_NAME):
# Other symbols
raise OpenEpdaExpressionError(
'Parameter name "{}" contains symbol(s) which is (are) not '
"letters or an underscore.".format(name)
)
elif name in reserved:
raise OpenEpdaExpressionError(
'Parameter name "{}" is a reserved identifier.'.format(name)
)
else:
return True
def check_expr(expr):
if not isinstance(expr, str):
raise TypeError(
"Expression is not a str type. It is {}.".format(type(expr))
)
expr = expr.replace(" ", "") # remove whitespaces
if not all(map(partial(contains, ALLOWED_SYMBOLS), expr)):
raise OpenEpdaExpressionError("Expression contains illegal symbols.")
else:
return True
openepda_parser = _BaseParser(
functions_1=STANDARD_FCNS_1_ARG_MAP,
functions_2=STANDARD_FCNS_2_ARG_MAP,
constants=CONSTANTS_MAP,
# exclude="ln",
)
TNestedSequence = Union[str, Real, Sequence["TNestedSequence"]]
TNestedRealSequence = Union[Real, Sequence["TNestedRealSequence"]]
TNestedRealList = Union[Real, List["TNestedRealList"]]
[docs]def evaluate_expression(
expr: TNestedSequence, param_values: Dict[str, Any] = None
) -> Union[Real, TNestedRealList]:
"""Evaluate expression if necessary and return a numeric value.
Parameters
----------
expr : TNestedSequence
expression to be checked and evaluated if needed. Can be a sequence
of expressions, or a sequence of sequences...
param_values : Optional[Dict[str, Any]]
Parameter values to be used during expression evaluation.
We do not restrict parameter values to the Real type, however,
this could make sense. It depends on whether the functions used in the
expression are able to produce numeric output for non-numeric inputs.
Returns
-------
res : Union[Real, TNestedRealList]
the input expression if it is a number, or the expression evaluation
result if it is a number.
Raises
------
TypeError
In cases when the expression cannot be evaluated, or when it returned
a non-numeric result.
"""
if isinstance(expr, Real):
res = expr
elif isinstance(expr, str):
res = openepda_parser.eval(expr, param_values=param_values)
if not isinstance(res, Real):
raise TypeError(
"Expression '{}' was evaluated, and a non-numeric type {} "
"was returned. Evaluation result: {}.".format(
expr, type(res), res
)
)
elif isinstance(expr, (MutableSequence, list)):
res = [evaluate_expression(e, param_values=param_values) for e in expr]
elif isinstance(expr, tuple):
res = tuple(
evaluate_expression(e, param_values=param_values) for e in expr
)
elif isinstance(expr, Sequence) and not isinstance(ByteString):
res = tuple(
evaluate_expression(e, param_values=param_values) for e in expr
)
else:
raise TypeError(
"Provided input '{}' has type {}. It is not a string or a number, "
"and therefore cannot be evaluated as a numeric value. ".format(
expr, type(expr)
)
)
return res