Source code for txgraffiti.heuristics.davila

import pandas as pd
from typing import List, Tuple, Union

from txgraffiti.logic import Inequality, Conjecture, KnowledgeTable

__all__ = [
    'normalize_inequality_key',
    'same_conclusion',
    'is_strict_subset',
    'morgan_accept',
]


[docs] def normalize_inequality_key(ineq: Inequality) -> Tuple[str, str, str]: """ Produce a canonical key for an inequality so that it is always represented in “lhs <= rhs” form. Parameters ---------- ineq : Inequality An inequality between two Properties, e.g. `P >= Q` or `P < Q`. Returns ------- key : tuple of str A 3‐tuple `(lhs_name, "<=", rhs_name)` such that the returned key always uses the `<=` operator, flipping `>=` or `>` by swapping operands if necessary. Examples -------- >>> from txgraffiti.logic import Property, Inequality >>> from txgraffiti.heuristics.davila import normalize_inequality_key >>> P = Property('alpha', lambda df: df['alpha']) >>> Q = Property('beta', lambda df: df['beta']) >>> ineq1 = Inequality(P, '>=', Q) >>> normalize_inequality_key(ineq1) ('beta', '<=', 'alpha') """ lhs, op, rhs = ineq.lhs, ineq.op, ineq.rhs if op in (">=", ">", "≥"): # flip to "rhs <= lhs" return (rhs.name, "<=", lhs.name) else: # ≤, <, or "≤" return (lhs.name, "<=", rhs.name)
[docs] def same_conclusion(a: Conjecture, b: Conjecture) -> bool: """ Determine whether two conjectures share the same logical conclusion, up to flipping reversed inequalities. Parameters ---------- a : Conjecture First conjecture whose conclusion is an Inequality. b : Conjecture Second conjecture whose conclusion is an Inequality. Returns ------- bool True if their conclusions map to the same canonical key via `normalize_inequality_key`, i.e. they assert the same bound. Examples -------- >>> from txgraffiti.logic import Predicate, Property, Conjecture, Inequality >>> from txgraffiti.heuristics.davila import same_conclusion >>> P = Predicate('connected', lambda df: df['connected']) >>> A = Property('alpha', lambda df: df['alpha']) >>> B = Property('beta', lambda df: df['beta']) >>> c1 = Conjecture(P, Inequality(A, '<=', B)) >>> c2 = Conjecture(P, Inequality(B, '>=', A)) >>> same_conclusion(c1, c2) True """ return normalize_inequality_key(a.conclusion) == normalize_inequality_key(b.conclusion)
[docs] def is_strict_subset(m1: pd.Series, m2: pd.Series) -> bool: """ Check whether boolean mask `m1` is a strict subset of mask `m2`. That is, every True in `m1` is also True in `m2`, and `m2` has strictly more True entries than `m1`. Parameters ---------- m1 : pandas.Series of bool Candidate subset mask. m2 : pandas.Series of bool Candidate superset mask. Returns ------- bool True if `m1 & ~m2` has no True values (so `m1 ⊆ m2`) and `m2.sum() > m1.sum()`. Examples -------- >>> import pandas as pd >>> from txgraffiti.heuristics.davila import is_strict_subset >>> m1 = pd.Series([True, False, True]) >>> m2 = pd.Series([True, True, True]) >>> is_strict_subset(m1, m2) True >>> # not a strict subset if sums equal >>> is_strict_subset(m1, m1) False """ return bool(((m1 & ~m2).sum() == 0) and (m2.sum() > m1.sum()))
[docs] def morgan_accept( new_conj: Conjecture, existing: List[Conjecture], df: Union[pd.DataFrame, KnowledgeTable], ) -> bool: """ Accept `new_conj` only if no existing conjecture with the same logical conclusion has a hypothesis mask that strictly contains `new_conj`’s mask. In other words, we reject `new_conj` if there is already a strictly more general conjecture (same bound but wider hypothesis coverage). Parameters ---------- new_conj : Conjecture The candidate conjecture to test. existing : list of Conjecture Previously accepted conjectures to compare against. df : pandas.DataFrame or KnowledgeTable The data table on which to evaluate hypothesis masks. Returns ------- bool True if no strictly more general existing conjecture was found, False otherwise. Examples -------- >>> import pandas as pd >>> from txgraffiti.logic import Predicate, Property, Conjecture, Inequality >>> from txgraffiti.heuristics.davila import morgan_accept >>> df = pd.DataFrame({ ... 'alpha': [1, 2, 3], ... 'beta': [3, 2, 1], ... 'connected': [True, True, True], ... 'tree': [False, True, False], ... }) >>> P_gen = Predicate('connected', lambda df: df['connected']) >>> P_sub = Predicate('tree', lambda df: df['tree']) >>> A = Property('alpha', lambda df: df['alpha']) >>> B = Property('beta', lambda df: df['beta']) >>> # less general hypothesis on tree >>> c1 = P_sub >> (A <= B) >>> # more general hypothesis on connected >>> c2 = P_gen >> (A <= B) >>> # c2 covers strictly more rows → accept c2 but not c1 if c2 exists >>> morgan_accept(c1, [c2], df) False >>> morgan_accept(c2, [c1], df) True """ new_mask = new_conj.hypothesis(df) for old in existing: if same_conclusion(old, new_conj): old_mask = old.hypothesis(df) if is_strict_subset(new_mask, old_mask): return False return True