OleSeifert · OleSeifert · Jun 15, 2025 · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025
diff --git a/backend/api/modules/declarative_router.py b/backend/api/modules/declarative_router.py
@@ -14,6 +14,7 @@
 from backend.celonis_connection.celonis_connection_manager import (
     CelonisConnectionManager,
 )
+from backend.pql_queries import declarative_queries
 
 # **************** Type Aliases ****************
 
@@ -33,6 +34,7 @@ async def compute_declarative_constraints(
     request: Request,
     min_support: float = Query(0.3, description="Minimum support ratio"),
     min_confidence: float = Query(0.75, description="Minimum confidence ratio"),
+    fitness_score: float = Query(1.0, description="Fitness score for the constraints"),
     celonis: CelonisConnectionManager = Depends(get_celonis_connection),
 ) -> Dict[str, str]:
     """Computes the declarative constraints and stores it.
@@ -48,6 +50,7 @@ async def compute_declarative_constraints(
           Defaults to Depends(get_celonis_connection).
         min_support: The minimum support ratio for the constraints.
         min_confidence: The minimum confidence ratio for the constraints.
+        fitness_score: The fitness score for the constraints.
 
     Returns:
         A dictionary containing the job ID of the scheduled task.
@@ -65,12 +68,13 @@ async def compute_declarative_constraints(
         celonis,
         min_support,
         min_confidence,
+        fitness_score,
     )
 
     return {"job_id": job_id}
 
 
-# **************** Retrieving Declarative Model Attributes ****************
+# **************** Retrieving Declarative Model Attributes - PM4PY ****************
 
 
 @router.get("/get_existance_violations/{job_id}")
@@ -363,3 +367,42 @@ def get_nonchainsuccession_violations(job_id: str, request: Request) -> ReturnGr
     verify_correct_job_module(job_id, request, MODULE_NAME)
 
     return request.app.state.jobs[job_id].result.get("nonchainsuccession", [])
+
+
+# **************** Retrieving Declarative Model Attributes - PQL Queries ****************
+
+
+@router.get("/get_always_after_pql/")
+def get_always_after_pql(
+    request: Request,
+    celonis: CelonisConnectionManager = Depends(get_celonis_connection),
+) -> Dict[str, Union[List[TableType], List[GraphType]]]:
+    """Retrieves the always-after relations via PQL.
+
+    Args:
+        request: The FastAPI request object.
+        celonis: The CelonisManager dependency injection.
+
+    Returns:
+        A JSON object with "tables" and "graphs" keys.
+    """
+    result_df = declarative_queries.get_always_after_relation(celonis)
+    return result_df
+
+
+@router.get("/get_always_before_pql/")
+def get_always_before_pql(
+    request: Request,
+    celonis: CelonisConnectionManager = Depends(get_celonis_connection),
+) -> Dict[str, Union[List[TableType], List[GraphType]]]:
+    """Retrieves the always-before relations via PQL.
+
+    Args:
+        request: The FastAPI request object.
+        celonis: The CelonisManager dependency injection.
+
+    Returns:
+        A JSON object with "tables" and "graphs" keys.
+    """
+    result_df = declarative_queries.get_always_before_relation(celonis)
+    return result_df
diff --git a/backend/api/tasks/declarative_constraints_tasks.py b/backend/api/tasks/declarative_constraints_tasks.py
@@ -15,6 +15,7 @@ def compute_and_store_declarative_constraints(
     celonis: CelonisConnectionManager,
     min_support_ratio: float = 0.3,
     min_confidence_ratio: float = 0.75,
+    fitness_score: float = 1.0,
 ) -> None:
     """Computes the declarative constraints and stores it in the app state.
 
@@ -24,6 +25,7 @@ def compute_and_store_declarative_constraints(
         celonis: The CelonisConnectionManager instance.
         min_support_ratio: The minimum support ratio for the constraints.
         min_confidence_ratio: The minimum confidence ratio for the constraints.
+        fitness_score: The fitness score for the constraints.
     """
     # Get the job record from the app state
     rec: JobStatus = app.state.jobs[job_id]
@@ -42,6 +44,7 @@ def compute_and_store_declarative_constraints(
         rec.result = dc.update_model_and_run_all_rules(
             min_support_ratio=min_support_ratio,
             min_confidence_ratio=min_confidence_ratio,
+            fitness_score=fitness_score,
         )
         rec.status = "complete"
 

diff --git a/backend/conformance_checking/declarative_constraints.py b/backend/conformance_checking/declarative_constraints.py
@@ -34,6 +34,7 @@ def __init__(
         log: pd.DataFrame,
         min_support_ratio: Optional[float] = 0.3,
         min_confidence_ratio: Optional[float] = 0.75,
+        fitness_score: Optional[float] = 1.0,
         case_id_col: Optional[str] = None,
         activity_col: Optional[str] = None,
         timestamp_col: Optional[str] = None,
@@ -48,13 +49,16 @@ def __init__(
               Defaults to 0.3.
             min_confidence_ratio: The minimum confidence ratio for discovering rules.
               Defaults to 0.75.
+            fitness_score: The fitness score threshold for conformance checking.
+              Defaults to 1.0.
             case_id_col : The name of the column containing case IDs.
             activity_col : The name of the column containing activity names.
             timestamp_col : The name of the column containing timestamps.
         """
         self.log = log
         self.min_support_ratio = min_support_ratio
         self.min_confidence_ratio = min_confidence_ratio
+        self.fitness_score = fitness_score
         self.declare_model: Optional[DeclareModelType] = None
         self.case_id_col: Optional[str] = case_id_col
         self.activity_col: Optional[str] = activity_col
@@ -91,6 +95,7 @@ def run_model(
         log: Optional[pd.DataFrame] = None,
         min_support_ratio: Optional[float] = None,
         min_confidence_ratio: Optional[float] = None,
+        fitness_score: Optional[float] = None,
     ) -> None:
         """Runs the declarative model on the event log.
 
@@ -100,13 +105,16 @@ def run_model(
             log: The event log to use.
             min_support_ratio: The minimum support ratio for discovering rules.
             min_confidence_ratio: The minimum confidence ratio for discovering rules.
+            fitness_score: The fitness score threshold for conformance checking.
         """
         if log is None:
             log = self.log
         if min_support_ratio is None:
             min_support_ratio = self.min_support_ratio
         if min_confidence_ratio is None:
             min_confidence_ratio = self.min_confidence_ratio
+        if fitness_score is not None:
+            self.fitness_score = fitness_score
         self.declare_model = pm4py.discover_declare(  # type: ignore
             log,
             min_support_ratio=min_support_ratio,
@@ -121,6 +129,7 @@ def rule_specific_violation_summary(
         declare_model: Optional[DeclareModelType] = None,
         log: Optional[pd.DataFrame] = None,
         rule_name: Optional[str] = None,
+        fitness_score: Optional[float] = None,
         verbose: bool = False,
     ) -> ReturnGraphType:
         """Summarizes number of violations for a declarative rule.
@@ -133,6 +142,7 @@ def rule_specific_violation_summary(
             log: The event log. If None, uses the default log.
             rule_name: Name of the rule to check.
             verbose: Whether to print details for debugging.
+            fitness_score: The fitness score threshold for conformance checking.
 
         Returns:
             Summary with graph and table information of rule violations.
@@ -146,11 +156,15 @@ def rule_specific_violation_summary(
             declare_model = self.declare_model
         if log is None:
             log = self.log
+        if fitness_score is not None:
+            self.fitness_score = fitness_score
 
         if str(rule_name) not in self.valid_rules:
             raise ValueError(
                 f"Unsupported rule: '{rule_name}'. Must be one of: {self.valid_rules}"
             )
+        if self.fitness_score is None:
+            self.fitness_score = 1.0
 
         if declare_model is None:
             raise ValueError("Declare model is stil None. Something has gone wrong.")
@@ -171,7 +185,9 @@ def rule_specific_violation_summary(
                 else:
                     A, B = rule_key, None  # type: ignore
                 diagnostics = decl_conf.apply(log, {rule_name: {(A, B): rule_info}})  # type: ignore
-                violated = [d for d in diagnostics if d["dev_fitness"] < 1.0]  # type: ignore
+                violated = [
+                    d for d in diagnostics if d["dev_fitness"] < self.fitness_score
+                ]  # type: ignore
                 violation_count = len(violated)  # type: ignore
 
                 if violation_count > 0:
@@ -410,6 +426,7 @@ def update_model_and_run_all_rules(
         log: Optional[pd.DataFrame] = None,
         min_support_ratio: Optional[float] = None,
         min_confidence_ratio: Optional[float] = None,
+        fitness_score: Optional[float] = 1.0,
         list_of_rules: Optional[List[str]] = None,
         run_from_scratch: Optional[bool] = False,
     ) -> Any:
@@ -423,6 +440,7 @@ def update_model_and_run_all_rules(
               valid rules.
             run_from_scratch: If True, re-evaluates all rules even if results
               stored.
+            fitness_score: The fitness score threshold for conformance checking.
 
         Returns:
             Dictionary of all violations.
@@ -435,11 +453,14 @@ def update_model_and_run_all_rules(
             min_confidence_ratio = self.min_confidence_ratio
         if list_of_rules is None:
             list_of_rules = self.valid_rules
+        if fitness_score is not None:
+            self.fitness_score = fitness_score
 
         self.run_model(
             log=log,
             min_support_ratio=min_support_ratio,
             min_confidence_ratio=min_confidence_ratio,
+            fitness_score=fitness_score,
         )
         for rule in list_of_rules:
             self.temp = self.get_declarative_conformance_diagnostics(

diff --git a/backend/pql_queries/declarative_queries.py b/backend/pql_queries/declarative_queries.py
@@ -0,0 +1,159 @@
+"""Queries that can be used to get log-skeleton related data from celonis."""
+
+from itertools import combinations
+from typing import Dict, List, TypeAlias, Union, Any
+
+from pandas import DataFrame
+import pandas as pd
+
+from backend.celonis_connection.celonis_connection_manager import (
+    CelonisConnectionManager,
+)
+from backend.pql_queries.general_queries import get_activities
+
+# **************** Type Aliases ****************
+
+TableType: TypeAlias = Dict[str, Any]
+GraphType: TypeAlias = Dict[str, Any]
+ReturnGraphType: TypeAlias = Dict[str, Union[List[TableType], List[GraphType]]]
+
+# **************** Formatting Function ****************
+
+
+def format_graph_and_table(curr_df: pd.DataFrame) -> ReturnGraphType:
+    """Formats the DataFrame into a graph and table structure.
+
+    Args:
+        curr_df (pd.DataFrame): The DataFrame to format.
+
+    Returns:
+        ReturnGraphType: A dictionary containing the formatted graph and table.
+    """
+    output: ReturnGraphType = {"graphs": [], "tables": []}
+
+    if not curr_df.empty:
+        if curr_df.shape[1] == 3:
+            nodes = []
+            edges = []
+            for i, row in curr_df.iterrows():  # type: ignore
+                nodes.append(str(row[curr_df.columns[0]]))  # type: ignore
+                nodes.append(str(row[curr_df.columns[1]]))  # type: ignore
+                edges.append(  # type: ignore
+                    {  # type: ignore
+                        "from": str(row[curr_df.columns[0]]),  # type: ignore
+                        "to": str(row[curr_df.columns[1]]),  # type: ignore
+                        "label": str(row[curr_df.columns[2]]),  # type: ignore
+                    }
+                )
+
+            nodes = [{"id": str(ele)} for ele in list(set(list(nodes)))]  # type: ignore
+            output["graphs"].append(
+                {
+                    "nodes": nodes,  # type: ignore
+                    "edges": edges,
+                }
+            )
+
+            headers = list(curr_df.columns)
+            rows = curr_df.values.tolist()  # type: ignore
+            output["tables"].append(
+                {
+                    "headers": headers,  # type: ignore
+                    "rows": [[str(ele) for ele in row] for row in rows],  # type: ignore
+                }
+            )
+        else:
+            headers = list(curr_df.columns)
+            rows = curr_df.values.tolist()  # type: ignore
+            output["tables"].append(
+                {
+                    "headers": headers,  # type: ignore
+                    "rows": [[str(ele) for ele in row] for row in rows],  # type: ignore
+                }
+            )
+    return output
+
+
+# **************** PQL Functions ****************
+
+
+# Always before
+def get_always_before_relation(celonis: CelonisConnectionManager) -> ReturnGraphType:
+    """Compute Always-Before summary using PQL.
+
+    Args:
+        celonis (CelonisConnectionManager): the celonis connection
+
+    Returns:
+        ReturnGraphType: A dictionary containing the formatted graph and table.
+    """
+    target_df: pd.DataFrame = DataFrame(
+        columns=["Activity A", "Activity B", "# Occurrences"]
+    )
+    act_table = get_activities(celonis)  # type: ignore
+    activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2))  # type: ignore
+    for i, pair in enumerate(activitiy_pairs):  # type: ignore
+        query = {
+            "A before B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src,
+                            NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
+            "B before A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src,
+                            NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
+        }
+        pair_df = celonis.get_dataframe_from_celonis(query)  # type: ignore
+        if (pair_df["B before A"] == 1).any() and not (  # type: ignore
+            pair_df["A before B"] == 1  # type: ignore
+        ).any():  # type: ignore
+            target_df.loc[i] = [
+                pair[1],
+                pair[0],
+                int((pair_df["B before A"] == 1).sum()),  # type: ignore
+            ]  # type: ignore
+        elif (pair_df["A before B"] == 1).any() and not (  # type: ignore
+            pair_df["B before A"] == 1  # type: ignore
+        ).any():  # type: ignore
+            target_df.loc[i] = [
+                pair[0],
+                pair[1],
+                int((pair_df["A before B"] == 1).sum()),  # type: ignore
+            ]  # type: ignore
+    output = format_graph_and_table(target_df)
+    return output
+
+
+# Always after
+def get_always_after_relation(celonis: CelonisConnectionManager) -> ReturnGraphType:
+    """Compute Always-After summary using PQL.
+
+    Args:
+        celonis (CelonisConnectionManager): the celonis connection
+
+    Returns:
+        ReturnGraphType: A dictionary containing the formatted graph and table.
+    """
+    target_df = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"])
+    act_table = get_activities(celonis)
+    activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2))  # type: ignore
+    for i, pair in enumerate(activitiy_pairs):  # type: ignore
+        query = {
+            "A after B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src,
+                            NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
+            "B after A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src,
+                            NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""",
+        }
+        pair_df = celonis.get_dataframe_from_celonis(query)  # type: ignore
+        if (pair_df["B after A"] == 1).any() and not (pair_df["A after B"] == 1).any():  # type: ignore
+            target_df.loc[i] = [
+                pair[1],
+                pair[0],
+                int((pair_df["B after A"] == 1).sum()),  # type: ignore
+            ]  # type: ignore
+        elif (pair_df["A after B"] == 1).any() and not (  # type: ignore
+            pair_df["B after A"] == 1  # type: ignore
+        ).any():  # type: ignore
+            target_df.loc[i] = [
+                pair[0],
+                pair[1],
+                int((pair_df["A after B"] == 1).sum()),  # type: ignore
+            ]  # type: ignore
+    output = format_graph_and_table(target_df)
+    return output
diff --git a/backend/pql_queries/declerative_queries.py b/backend/pql_queries/declerative_queries.py