Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion backend/api/modules/declarative_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from backend.celonis_connection.celonis_connection_manager import (
CelonisConnectionManager,
)
from backend.pql_queries import declarative_queries

# **************** Type Aliases ****************

Expand All @@ -33,6 +34,7 @@ async def compute_declarative_constraints(
request: Request,
min_support: float = Query(0.3, description="Minimum support ratio"),
min_confidence: float = Query(0.75, description="Minimum confidence ratio"),
fitness_score: float = Query(1.0, description="Fitness score for the constraints"),
celonis: CelonisConnectionManager = Depends(get_celonis_connection),
) -> Dict[str, str]:
"""Computes the declarative constraints and stores it.
Expand All @@ -48,6 +50,7 @@ async def compute_declarative_constraints(
Defaults to Depends(get_celonis_connection).
min_support: The minimum support ratio for the constraints.
min_confidence: The minimum confidence ratio for the constraints.
fitness_score: The fitness score for the constraints.

Returns:
A dictionary containing the job ID of the scheduled task.
Expand All @@ -65,12 +68,13 @@ async def compute_declarative_constraints(
celonis,
min_support,
min_confidence,
fitness_score,
)

return {"job_id": job_id}


# **************** Retrieving Declarative Model Attributes ****************
# **************** Retrieving Declarative Model Attributes - PM4PY ****************


@router.get("/get_existance_violations/{job_id}")
Expand Down Expand Up @@ -363,3 +367,42 @@ def get_nonchainsuccession_violations(job_id: str, request: Request) -> ReturnGr
verify_correct_job_module(job_id, request, MODULE_NAME)

return request.app.state.jobs[job_id].result.get("nonchainsuccession", [])


# **************** Retrieving Declarative Model Attributes - PQL Queries ****************


@router.get("/get_always_after_pql/")
def get_always_after_pql(
request: Request,
celonis: CelonisConnectionManager = Depends(get_celonis_connection),
) -> Dict[str, Union[List[TableType], List[GraphType]]]:
"""Retrieves the always-after relations via PQL.

Args:
request: The FastAPI request object.
celonis: The CelonisManager dependency injection.

Returns:
A JSON object with "tables" and "graphs" keys.
"""
result_df = declarative_queries.get_always_after_relation(celonis)
return result_df


@router.get("/get_always_before_pql/")
def get_always_before_pql(
request: Request,
celonis: CelonisConnectionManager = Depends(get_celonis_connection),
) -> Dict[str, Union[List[TableType], List[GraphType]]]:
"""Retrieves the always-before relations via PQL.

Args:
request: The FastAPI request object.
celonis: The CelonisManager dependency injection.

Returns:
A JSON object with "tables" and "graphs" keys.
"""
result_df = declarative_queries.get_always_before_relation(celonis)
return result_df
3 changes: 3 additions & 0 deletions backend/api/tasks/declarative_constraints_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def compute_and_store_declarative_constraints(
celonis: CelonisConnectionManager,
min_support_ratio: float = 0.3,
min_confidence_ratio: float = 0.75,
fitness_score: float = 1.0,
) -> None:
"""Computes the declarative constraints and stores it in the app state.

Expand All @@ -24,6 +25,7 @@ def compute_and_store_declarative_constraints(
celonis: The CelonisConnectionManager instance.
min_support_ratio: The minimum support ratio for the constraints.
min_confidence_ratio: The minimum confidence ratio for the constraints.
fitness_score: The fitness score for the constraints.
"""
# Get the job record from the app state
rec: JobStatus = app.state.jobs[job_id]
Expand All @@ -42,6 +44,7 @@ def compute_and_store_declarative_constraints(
rec.result = dc.update_model_and_run_all_rules(
min_support_ratio=min_support_ratio,
min_confidence_ratio=min_confidence_ratio,
fitness_score=fitness_score,
)
rec.status = "complete"

Expand Down
23 changes: 22 additions & 1 deletion backend/conformance_checking/declarative_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
log: pd.DataFrame,
min_support_ratio: Optional[float] = 0.3,
min_confidence_ratio: Optional[float] = 0.75,
fitness_score: Optional[float] = 1.0,
case_id_col: Optional[str] = None,
activity_col: Optional[str] = None,
timestamp_col: Optional[str] = None,
Expand All @@ -48,13 +49,16 @@ def __init__(
Defaults to 0.3.
min_confidence_ratio: The minimum confidence ratio for discovering rules.
Defaults to 0.75.
fitness_score: The fitness score threshold for conformance checking.
Defaults to 1.0.
case_id_col : The name of the column containing case IDs.
activity_col : The name of the column containing activity names.
timestamp_col : The name of the column containing timestamps.
"""
self.log = log
self.min_support_ratio = min_support_ratio
self.min_confidence_ratio = min_confidence_ratio
self.fitness_score = fitness_score
self.declare_model: Optional[DeclareModelType] = None
self.case_id_col: Optional[str] = case_id_col
self.activity_col: Optional[str] = activity_col
Expand Down Expand Up @@ -91,6 +95,7 @@ def run_model(
log: Optional[pd.DataFrame] = None,
min_support_ratio: Optional[float] = None,
min_confidence_ratio: Optional[float] = None,
fitness_score: Optional[float] = None,
) -> None:
"""Runs the declarative model on the event log.

Expand All @@ -100,13 +105,16 @@ def run_model(
log: The event log to use.
min_support_ratio: The minimum support ratio for discovering rules.
min_confidence_ratio: The minimum confidence ratio for discovering rules.
fitness_score: The fitness score threshold for conformance checking.
"""
if log is None:
log = self.log
if min_support_ratio is None:
min_support_ratio = self.min_support_ratio
if min_confidence_ratio is None:
min_confidence_ratio = self.min_confidence_ratio
if fitness_score is not None:
self.fitness_score = fitness_score
self.declare_model = pm4py.discover_declare( # type: ignore
log,
min_support_ratio=min_support_ratio,
Expand All @@ -121,6 +129,7 @@ def rule_specific_violation_summary(
declare_model: Optional[DeclareModelType] = None,
log: Optional[pd.DataFrame] = None,
rule_name: Optional[str] = None,
fitness_score: Optional[float] = None,
verbose: bool = False,
) -> ReturnGraphType:
"""Summarizes number of violations for a declarative rule.
Expand All @@ -133,6 +142,7 @@ def rule_specific_violation_summary(
log: The event log. If None, uses the default log.
rule_name: Name of the rule to check.
verbose: Whether to print details for debugging.
fitness_score: The fitness score threshold for conformance checking.

Returns:
Summary with graph and table information of rule violations.
Expand All @@ -146,11 +156,15 @@ def rule_specific_violation_summary(
declare_model = self.declare_model
if log is None:
log = self.log
if fitness_score is not None:
self.fitness_score = fitness_score

if str(rule_name) not in self.valid_rules:
raise ValueError(
f"Unsupported rule: '{rule_name}'. Must be one of: {self.valid_rules}"
)
if self.fitness_score is None:
self.fitness_score = 1.0

if declare_model is None:
raise ValueError("Declare model is stil None. Something has gone wrong.")
Expand All @@ -171,7 +185,9 @@ def rule_specific_violation_summary(
else:
A, B = rule_key, None # type: ignore
diagnostics = decl_conf.apply(log, {rule_name: {(A, B): rule_info}}) # type: ignore
violated = [d for d in diagnostics if d["dev_fitness"] < 1.0] # type: ignore
violated = [
d for d in diagnostics if d["dev_fitness"] < self.fitness_score
] # type: ignore
violation_count = len(violated) # type: ignore

if violation_count > 0:
Expand Down Expand Up @@ -410,6 +426,7 @@ def update_model_and_run_all_rules(
log: Optional[pd.DataFrame] = None,
min_support_ratio: Optional[float] = None,
min_confidence_ratio: Optional[float] = None,
fitness_score: Optional[float] = 1.0,
list_of_rules: Optional[List[str]] = None,
run_from_scratch: Optional[bool] = False,
) -> Any:
Expand All @@ -423,6 +440,7 @@ def update_model_and_run_all_rules(
valid rules.
run_from_scratch: If True, re-evaluates all rules even if results
stored.
fitness_score: The fitness score threshold for conformance checking.

Returns:
Dictionary of all violations.
Expand All @@ -435,11 +453,14 @@ def update_model_and_run_all_rules(
min_confidence_ratio = self.min_confidence_ratio
if list_of_rules is None:
list_of_rules = self.valid_rules
if fitness_score is not None:
self.fitness_score = fitness_score

self.run_model(
log=log,
min_support_ratio=min_support_ratio,
min_confidence_ratio=min_confidence_ratio,
fitness_score=fitness_score,
)
for rule in list_of_rules:
self.temp = self.get_declarative_conformance_diagnostics(
Expand Down
159 changes: 159 additions & 0 deletions backend/pql_queries/declarative_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
"""Queries that can be used to get log-skeleton related data from celonis."""

from itertools import combinations
from typing import Dict, List, TypeAlias, Union, Any

from pandas import DataFrame
import pandas as pd

from backend.celonis_connection.celonis_connection_manager import (
CelonisConnectionManager,
)
from backend.pql_queries.general_queries import get_activities

# **************** Type Aliases ****************

TableType: TypeAlias = Dict[str, Any]
GraphType: TypeAlias = Dict[str, Any]
ReturnGraphType: TypeAlias = Dict[str, Union[List[TableType], List[GraphType]]]

# **************** Formatting Function ****************


def format_graph_and_table(curr_df: pd.DataFrame) -> ReturnGraphType:
"""Formats the DataFrame into a graph and table structure.

Args:
curr_df (pd.DataFrame): The DataFrame to format.

Returns:
ReturnGraphType: A dictionary containing the formatted graph and table.
"""
output: ReturnGraphType = {"graphs": [], "tables": []}

if not curr_df.empty:
if curr_df.shape[1] == 3:
nodes = []
edges = []
for i, row in curr_df.iterrows(): # type: ignore
nodes.append(str(row[curr_df.columns[0]])) # type: ignore
nodes.append(str(row[curr_df.columns[1]])) # type: ignore
edges.append( # type: ignore
{ # type: ignore
"from": str(row[curr_df.columns[0]]), # type: ignore
"to": str(row[curr_df.columns[1]]), # type: ignore
"label": str(row[curr_df.columns[2]]), # type: ignore
}
)

nodes = [{"id": str(ele)} for ele in list(set(list(nodes)))] # type: ignore
output["graphs"].append(
{
"nodes": nodes, # type: ignore
"edges": edges,
}
)

headers = list(curr_df.columns)
rows = curr_df.values.tolist() # type: ignore
output["tables"].append(
{
"headers": headers, # type: ignore
"rows": [[str(ele) for ele in row] for row in rows], # type: ignore
}
)
else:
headers = list(curr_df.columns)
rows = curr_df.values.tolist() # type: ignore
output["tables"].append(
{
"headers": headers, # type: ignore
"rows": [[str(ele) for ele in row] for row in rows], # type: ignore
}
)
return output


# **************** PQL Functions ****************


# Always before
def get_always_before_relation(celonis: CelonisConnectionManager) -> ReturnGraphType:
"""Compute Always-Before summary using PQL.

Args:
celonis (CelonisConnectionManager): the celonis connection

Returns:
ReturnGraphType: A dictionary containing the formatted graph and table.
"""
target_df: pd.DataFrame = DataFrame(
columns=["Activity A", "Activity B", "# Occurrences"]
)
act_table = get_activities(celonis) # type: ignore
activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore
for i, pair in enumerate(activitiy_pairs): # type: ignore
query = {
"A before B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src,
NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
"B before A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src,
NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
}
pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore
if (pair_df["B before A"] == 1).any() and not ( # type: ignore
pair_df["A before B"] == 1 # type: ignore
).any(): # type: ignore
target_df.loc[i] = [
pair[1],
pair[0],
int((pair_df["B before A"] == 1).sum()), # type: ignore
] # type: ignore
elif (pair_df["A before B"] == 1).any() and not ( # type: ignore
pair_df["B before A"] == 1 # type: ignore
).any(): # type: ignore
target_df.loc[i] = [
pair[0],
pair[1],
int((pair_df["A before B"] == 1).sum()), # type: ignore
] # type: ignore
output = format_graph_and_table(target_df)
return output


# Always after
def get_always_after_relation(celonis: CelonisConnectionManager) -> ReturnGraphType:
"""Compute Always-After summary using PQL.

Args:
celonis (CelonisConnectionManager): the celonis connection

Returns:
ReturnGraphType: A dictionary containing the formatted graph and table.
"""
target_df = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"])
act_table = get_activities(celonis)
activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore
for i, pair in enumerate(activitiy_pairs): # type: ignore
query = {
"A after B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src,
NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
"B after A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src,
NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
}
pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore
if (pair_df["B after A"] == 1).any() and not (pair_df["A after B"] == 1).any(): # type: ignore
target_df.loc[i] = [
pair[1],
pair[0],
int((pair_df["B after A"] == 1).sum()), # type: ignore
] # type: ignore
elif (pair_df["A after B"] == 1).any() and not ( # type: ignore
pair_df["B after A"] == 1 # type: ignore
).any(): # type: ignore
target_df.loc[i] = [
pair[0],
pair[1],
int((pair_df["A after B"] == 1).sum()), # type: ignore
] # type: ignore
output = format_graph_and_table(target_df)
return output
1 change: 0 additions & 1 deletion backend/pql_queries/declerative_queries.py

This file was deleted.

Loading