From a3c5937fa62976aec01f89360b257090ffd2ab0e Mon Sep 17 00:00:00 2001 From: Yash Raj Date: Thu, 12 Jun 2025 03:26:20 +0200 Subject: [PATCH 01/18] Added user input for support & confidence in model --- .../declarative_constraints.py | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/backend/conformance_checking/declarative_constraints.py b/backend/conformance_checking/declarative_constraints.py index 7f26334..a793b07 100644 --- a/backend/conformance_checking/declarative_constraints.py +++ b/backend/conformance_checking/declarative_constraints.py @@ -104,7 +104,7 @@ def run_model( min_support_ratio = self.min_support_ratio if min_confidence_ratio is None: min_confidence_ratio = self.min_confidence_ratio - self.declare_model = pm4py.discover_declare( + self.declare_model = pm4py.discover_declare( # type: ignore log, min_support_ratio=min_support_ratio, min_confidence_ratio=min_confidence_ratio, @@ -168,8 +168,8 @@ def rule_specific_violation_summary( else: A, B = rule_key, None # type: ignore diagnostics = decl_conf.apply(log, {rule_name: {(A, B): rule_info}}) # type: ignore - violated = [d for d in diagnostics if d["dev_fitness"] < 1.0] - violation_count = len(violated) + violated = [d for d in diagnostics if d["dev_fitness"] < 1.0] # type: ignore + violation_count = len(violated) # type: ignore if B != []: table_headers = [ @@ -400,3 +400,45 @@ def run_all_rules( rule_name=rule, run_from_scratch=run_from_scratch ) return self.conf_results_memory + + def update_model_and_run_all_rules( + self, + log: Optional[pd.DataFrame] = None, + min_support_ratio: Optional[float] = None, + min_confidence_ratio: Optional[float] = None, + list_of_rules: Optional[List[str]] = None, + run_from_scratch: Optional[bool] = False, + ) -> Any: + """Updates the model and runs all rules. + + Args: + log: The event log to use. + min_support_ratio: The minimum support ratio for discovering rules. + min_confidence_ratio: The minimum confidence ratio for discovering rules. + list_of_rules: List of rule names to check. If None, runs for all + valid rules. + run_from_scratch: If True, re-evaluates all rules even if results + stored. + + Returns: + Dictionary of all violations. + """ + if log is None: + log = self.log + if min_support_ratio is None: + min_support_ratio = self.min_support_ratio + if min_confidence_ratio is None: + min_confidence_ratio = self.min_confidence_ratio + if list_of_rules is None: + list_of_rules = self.valid_rules + + self.run_model( + log=log, + min_support_ratio=min_support_ratio, + min_confidence_ratio=min_confidence_ratio, + ) + for rule in list_of_rules: + self.temp = self.get_declarative_conformance_diagnostics( + rule_name=rule, run_from_scratch=run_from_scratch + ) + return self.conf_results_memory From fb727c0871ab77680a55d452a3919fe9b18cd2b7 Mon Sep 17 00:00:00 2001 From: Yash Raj Date: Thu, 12 Jun 2025 03:27:02 +0200 Subject: [PATCH 02/18] Added user input for support & confidence in model --- backend/api/modules/declarative_router.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/backend/api/modules/declarative_router.py b/backend/api/modules/declarative_router.py index b050589..c670a57 100644 --- a/backend/api/modules/declarative_router.py +++ b/backend/api/modules/declarative_router.py @@ -3,7 +3,7 @@ import uuid from typing import Dict, List, TypeAlias, Union -from fastapi import APIRouter, BackgroundTasks, Depends, Request +from fastapi import APIRouter, BackgroundTasks, Depends, Request, Query from backend.api.celonis import get_celonis_connection from backend.api.jobs import verify_correct_job_module @@ -27,10 +27,12 @@ MODULE_NAME = "declarative_constraints" -@router.post("/compute-constraints", status_code=202) +@router.get("/compute-constraints", status_code=202) async def compute_declarative_constraints( background_tasks: BackgroundTasks, request: Request, + min_support: float = Query(..., description="Minimum support ratio"), + min_confidence: float = Query(..., description="Minimum confidence ratio"), celonis: CelonisConnectionManager = Depends(get_celonis_connection), ) -> Dict[str, str]: """Computes the declarative constraints and stores it. @@ -44,6 +46,8 @@ async def compute_declarative_constraints( application state via `request.app.state`. celonis (optional): The CelonisManager dependency injection. Defaults to Depends(get_celonis_connection). + min_support: The minimum support ratio for the constraints. + min_confidence: The minimum confidence ratio for the constraints. Returns: A dictionary containing the job ID of the scheduled task. @@ -55,7 +59,12 @@ async def compute_declarative_constraints( # Schedule the worker background_tasks.add_task( - compute_and_store_declarative_constraints, request.app, job_id, celonis + compute_and_store_declarative_constraints, + request.app, + job_id, + celonis, + min_support, + min_confidence, ) return {"job_id": job_id} From c67e18a866af370d56d6d326c69c13bc9c8fb963 Mon Sep 17 00:00:00 2001 From: Yash Raj Date: Thu, 12 Jun 2025 03:27:34 +0200 Subject: [PATCH 03/18] Added user input for support & confidence in model --- backend/api/tasks/declarative_constraints_tasks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/api/tasks/declarative_constraints_tasks.py b/backend/api/tasks/declarative_constraints_tasks.py index 351b05c..f2072a2 100644 --- a/backend/api/tasks/declarative_constraints_tasks.py +++ b/backend/api/tasks/declarative_constraints_tasks.py @@ -39,11 +39,10 @@ def compute_and_store_declarative_constraints( # Compute the declarative constraints dc = DeclarativeConstraints(df) - dc.run_model( + rec.result = dc.update_model_and_run_all_rules( min_support_ratio=min_support_ratio, min_confidence_ratio=min_confidence_ratio, ) - rec.result = dc.run_all_rules() rec.status = "complete" except Exception as e: From 8a082ce70076bfb432d4741ba13393a0d641e2a0 Mon Sep 17 00:00:00 2001 From: Yash Raj Date: Sat, 14 Jun 2025 00:35:14 +0200 Subject: [PATCH 04/18] Fixed "Internal Server Error" issues --- .../declarative_constraints.py | 78 ++++++++++--------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/backend/conformance_checking/declarative_constraints.py b/backend/conformance_checking/declarative_constraints.py index a793b07..2362908 100644 --- a/backend/conformance_checking/declarative_constraints.py +++ b/backend/conformance_checking/declarative_constraints.py @@ -7,16 +7,17 @@ from typing import Any, Dict, List, Optional, TypeAlias, Union -import pandas as pd +import pandas as pd # type: ignore import pm4py # type: ignore from pm4py.algo.conformance.declare import algorithm as decl_conf # type: ignore # **************** Type Aliases **************** DeclareModelType: TypeAlias = Dict[str, Dict[Any, Dict[str, int]]] -ReturnGraphType: TypeAlias = Dict[ - str, List[Dict[str, List[Union[str, Dict[str, str]]]]] -] + +TableType: TypeAlias = Dict[str, Union[List[str], List[List[str]]]] +GraphType: TypeAlias = Dict[str, List[Dict[str, str]]] +ReturnGraphType: TypeAlias = Dict[str, Union[List[TableType], List[GraphType]]] class DeclarativeConstraints: @@ -58,25 +59,25 @@ def __init__( self.case_id_col: Optional[str] = case_id_col self.activity_col: Optional[str] = activity_col self.timestamp_col: Optional[str] = timestamp_col - self.valid_rules = [ - "existence", - "absence", - "exactly_one", - "init", - "responded_existence", - "coexistence", - "response", - "precedence", - "succession", - "altprecedence", - "altsuccession", - "chainresponse", - "chainprecedence", - "chainsuccession", - "noncoexistence", - "nonsuccession", - "nonchainsuccession", - ] + self.valid_rules = list({ + "Existence" : "existance", + "Never" : "absence", + "Exactly Once" : "exactly_one", + "Initially" : "init", + "Responded Existence" : "responded_existence", + "Co-Existence" : "coexistence", + "Always After" : "response", + "Always Before" : "precedence", + "Succession" : "succession", + "Alternate Precedence" : "altprecedence", + "Alternate Succession" : "altsuccession", + "Immediately After" : "chainresponse", + "Immediately Before" : "chainprecedence", + "Chain Succession" : "chainsuccession", + "Non Co-Existence" : "noncoexistence", + "Not Succession" : "nonsuccession", + "Not Chain Succession" : "nonchainsuccession", + }.values()) self.conf_results_memory: Dict[str, None] = { rule: None for rule in self.valid_rules } @@ -171,21 +172,22 @@ def rule_specific_violation_summary( violated = [d for d in diagnostics if d["dev_fitness"] < 1.0] # type: ignore violation_count = len(violated) # type: ignore - if B != []: - table_headers = [ - "First Activity", - "Second Activity", - "# Violations", - ] - graph_nodes.append(A) # type: ignore - graph_nodes.append(B) # type: ignore - graph_edges.append( - {"from": A, "to": B, "label": str(violation_count)} # type: ignore - ) - table_rows.append([A, B, str(violation_count)]) # type: ignore - else: - table_headers = ["Activity", "# Violations"] - table_rows.append([A, str(violation_count)]) # type: ignore + if violation_count > 0: + if rule_name not in ["existence", "absence", "init", "exactly_one"]: + table_headers = [ + "First Activity", + "Second Activity", + "# Violations", + ] + graph_nodes.append(A) # type: ignore + graph_nodes.append(B) # type: ignore + graph_edges.append( + {"from": A, "to": B, "label": str(violation_count)} # type: ignore + ) + table_rows.append([A, B, str(violation_count)]) # type: ignore + else: + table_headers = ["Activity", "# Violations"] + table_rows.append([A, str(violation_count)]) # type: ignore graph_nodes = [{"id": node} for node in list(set(list(graph_nodes)))] # type: ignore if table_headers != []: From c737c41b014ee28b4cc2542c44d0478908c0213c Mon Sep 17 00:00:00 2001 From: Yash Raj Date: Sat, 14 Jun 2025 00:36:19 +0200 Subject: [PATCH 05/18] Updated TypeAlias --- backend/api/modules/declarative_router.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/api/modules/declarative_router.py b/backend/api/modules/declarative_router.py index c670a57..2092865 100644 --- a/backend/api/modules/declarative_router.py +++ b/backend/api/modules/declarative_router.py @@ -17,9 +17,9 @@ # **************** Type Aliases **************** -ReturnGraphType: TypeAlias = Dict[ - str, List[Dict[str, List[Union[str, Dict[str, str]]]]] -] +TableType: TypeAlias = Dict[str, Union[List[str], List[List[str]]]] +GraphType: TypeAlias = Dict[str, List[Dict[str, str]]] +ReturnGraphType: TypeAlias = Dict[str, Union[List[TableType], List[GraphType]]] router = APIRouter( prefix="/api/declarative-constraints", tags=["Declarative Constraints CC"] @@ -31,8 +31,8 @@ async def compute_declarative_constraints( background_tasks: BackgroundTasks, request: Request, - min_support: float = Query(..., description="Minimum support ratio"), - min_confidence: float = Query(..., description="Minimum confidence ratio"), + min_support: float = Query(0.3, description="Minimum support ratio"), + min_confidence: float = Query(0.75, description="Minimum confidence ratio"), celonis: CelonisConnectionManager = Depends(get_celonis_connection), ) -> Dict[str, str]: """Computes the declarative constraints and stores it. From b77dcdc8fef014d18e8542433d8ea07ff1d2604d Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal <62815092+ekansh18@users.noreply.github.com> Date: Sun, 15 Jun 2025 01:40:54 +0200 Subject: [PATCH 06/18] Update declarative_constraints.py --- .../declarative_constraints.py | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/backend/conformance_checking/declarative_constraints.py b/backend/conformance_checking/declarative_constraints.py index 2362908..7f8be4a 100644 --- a/backend/conformance_checking/declarative_constraints.py +++ b/backend/conformance_checking/declarative_constraints.py @@ -59,25 +59,27 @@ def __init__( self.case_id_col: Optional[str] = case_id_col self.activity_col: Optional[str] = activity_col self.timestamp_col: Optional[str] = timestamp_col - self.valid_rules = list({ - "Existence" : "existance", - "Never" : "absence", - "Exactly Once" : "exactly_one", - "Initially" : "init", - "Responded Existence" : "responded_existence", - "Co-Existence" : "coexistence", - "Always After" : "response", - "Always Before" : "precedence", - "Succession" : "succession", - "Alternate Precedence" : "altprecedence", - "Alternate Succession" : "altsuccession", - "Immediately After" : "chainresponse", - "Immediately Before" : "chainprecedence", - "Chain Succession" : "chainsuccession", - "Non Co-Existence" : "noncoexistence", - "Not Succession" : "nonsuccession", - "Not Chain Succession" : "nonchainsuccession", - }.values()) + self.valid_rules = list( + { + "Existence": "existance", + "Never": "absence", + "Exactly Once": "exactly_one", + "Initially": "init", + "Responded Existence": "responded_existence", + "Co-Existence": "coexistence", + "Always After": "response", + "Always Before": "precedence", + "Succession": "succession", + "Alternate Precedence": "altprecedence", + "Alternate Succession": "altsuccession", + "Immediately After": "chainresponse", + "Immediately Before": "chainprecedence", + "Chain Succession": "chainsuccession", + "Non Co-Existence": "noncoexistence", + "Not Succession": "nonsuccession", + "Not Chain Succession": "nonchainsuccession", + }.values() + ) self.conf_results_memory: Dict[str, None] = { rule: None for rule in self.valid_rules } From f399656ddcc825fa86178259bff3ebe92f4e8064 Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 02:23:41 +0200 Subject: [PATCH 07/18] Added arrowheads in graphs where necessary --- frontend/src/ArrowGraph.js | 225 ++++++++++++++++++++++++++++++++++++ frontend/src/ResultsPage.js | 32 +++-- 2 files changed, 249 insertions(+), 8 deletions(-) create mode 100644 frontend/src/ArrowGraph.js diff --git a/frontend/src/ArrowGraph.js b/frontend/src/ArrowGraph.js new file mode 100644 index 0000000..4b18c41 --- /dev/null +++ b/frontend/src/ArrowGraph.js @@ -0,0 +1,225 @@ +import React, { useEffect, useRef } from "react"; +import * as d3 from "d3"; + +const ArrowGraph = ({ graphData }) => { + const svgRef = useRef(); + + useEffect(() => { + const width = 900; + const height = 600; + const padding = 50; + const nodeRadius = 38; + + const svg = d3.select(svgRef.current); + svg.selectAll("*").remove(); + + const zoomGroup = svg.append("g"); + + svg.call( + d3.zoom().on("zoom", (event) => { + zoomGroup.attr("transform", event.transform); + }) + ); + + const { nodes = [], edges = [] } = graphData; + + const colorScale = d3.scaleOrdinal(d3.schemeCategory10); + const nodeColorMap = {}; + nodes.forEach((n, i) => { + nodeColorMap[n.id] = colorScale(i); + }); + + const nodeById = new Map(nodes.map((n) => [n.id, n])); + + const edgeMap = new Map(); + edges.forEach((e) => { + const key = `${e.from}|||${e.to}`; + const label = String(e.label); + if (edgeMap.has(key)) { + edgeMap.get(key).labels.push(label); + } else { + edgeMap.set(key, { + from: e.from, + to: e.to, + labels: [label], + }); + } + }); + + const d3Edges = Array.from(edgeMap.values()) + .map((e) => ({ + source: nodeById.get(e.from), + target: nodeById.get(e.to), + label: e.labels.join(", "), + weight: e.labels.length, + color: nodeColorMap[e.from] || "#ccc", + })) + .filter((e) => e.source && e.target); + + // Marker for arrows — refX must match the shortened line offset + svg + .append("defs") + .append("marker") + .attr("id", "arrowhead") + .attr("viewBox", "0 -5 10 10") + .attr("refX", 10) // arrowhead offset relative to the shortened line + .attr("refY", 0) + .attr("markerWidth", 6) + .attr("markerHeight", 6) + .attr("orient", "auto") + .append("path") + .attr("d", "M0,-5L10,0L0,5") + .attr("fill", "#555"); + + // Initialize node positions + nodes.forEach((node) => { + node.x = Math.random() * (width - 2 * padding) + padding; + node.y = Math.random() * (height - 2 * padding) + padding; + }); + + const simulation = d3 + .forceSimulation(nodes) + .force( + "link", + d3 + .forceLink(d3Edges) + .id((d) => d.id) + .distance(220) + ) + .force("charge", d3.forceManyBody().strength(-600)) + .force("center", d3.forceCenter(width / 2, height / 2)); + + const link = zoomGroup + .append("g") + .attr("stroke-opacity", 0.6) + .selectAll("line") + .data(d3Edges) + .enter() + .append("line") + .attr("stroke-width", (d) => Math.max(1.5, Math.min(8, d.weight))) + .attr("stroke", (d) => d.color) + .attr("marker-end", "url(#arrowhead)"); + + const edgeLabel = zoomGroup + .append("g") + .selectAll("text") + .data(d3Edges) + .enter() + .append("text") + .attr("font-size", 10) + .attr("fill", "#333") + .attr("text-anchor", "middle") + .append("tspan") + .text((d) => d.label); + + const nodeGroup = zoomGroup + .append("g") + .selectAll("g") + .data(nodes) + .enter() + .append("g") + .call( + d3.drag().on("start", dragStart).on("drag", dragged).on("end", dragEnd) + ); + + nodeGroup + .append("circle") + .attr("r", nodeRadius) + .attr("fill", (d) => nodeColorMap[d.id]) + .attr("stroke", "#333") + .attr("stroke-width", 1.5); + + nodeGroup.append("title").text((d) => d.id); + + nodeGroup + .append("text") + .attr("text-anchor", "middle") + .attr("dy", 0) + .attr("font-size", 10) + .attr("fill", "#fff") + .style("pointer-events", "none") + .selectAll("tspan") + .data((d) => + d.id.length > 12 + ? [d.id.slice(0, d.id.length / 2), d.id.slice(d.id.length / 2)] + : [d.id] + ) + .enter() + .append("tspan") + .attr("x", 0) + .attr("dy", (d, i) => (i === 0 ? 0 : 12)) + .text((d) => d); + + simulation.on("tick", () => { + nodeGroup.attr("transform", (d) => { + d.x = Math.max(padding, Math.min(width - padding, d.x)); + d.y = Math.max(padding, Math.min(height - padding, d.y)); + return `translate(${d.x},${d.y})`; + }); + + link + .attr("x1", (d) => d.source.x) + .attr("y1", (d) => d.source.y) + .attr("x2", (d) => { + const dx = d.target.x - d.source.x; + const dy = d.target.y - d.source.y; + const dist = Math.sqrt(dx * dx + dy * dy); + return d.target.x - (dx * (nodeRadius + 5)) / dist; + }) + .attr("y2", (d) => { + const dx = d.target.x - d.source.x; + const dy = d.target.y - d.source.y; + const dist = Math.sqrt(dx * dx + dy * dy); + return d.target.y - (dy * (nodeRadius + 5)) / dist; + }); + + // Track label counts between same source-target pairs + const labelOffsets = new Map(); + edgeLabel + .attr("x", (d) => (d.source.x + d.target.x) / 2) + .attr("y", function (d) { + const key = `${d.source.id}|||${d.target.id}`; + const count = labelOffsets.get(key) || 0; + labelOffsets.set(key, count + 1); + + // Alternate label positions around the line + const baseY = (d.source.y + d.target.y) / 2; + const offset = (count - 1) * 12; + return baseY + (count % 2 === 0 ? offset : -offset); + }); + }); + + setTimeout(() => simulation.stop(), 3000); + + function dragStart(event, d) { + if (!event.active) simulation.alphaTarget(0.3).restart(); + d.fx = d.x; + d.fy = d.y; + } + + function dragged(event, d) { + d.fx = event.x; + d.fy = event.y; + } + + function dragEnd(event, d) { + if (!event.active) simulation.alphaTarget(0); + } + }, [graphData]); + + return ( + + ); +}; + +export default ArrowGraph; diff --git a/frontend/src/ResultsPage.js b/frontend/src/ResultsPage.js index 0adb567..b13b5d3 100644 --- a/frontend/src/ResultsPage.js +++ b/frontend/src/ResultsPage.js @@ -16,6 +16,7 @@ import { import Graph from "./Graph"; import Table from "./Table"; +import ArrowGraph from "./ArrowGraph"; import { GET_GENERAL_INSIGHTS, @@ -509,7 +510,7 @@ const ResultsPage = () => { const handleSNAOrOrgOption = async (selected) => { const endpointMap = { "Handover of Work": HANDOVER_OF_WORK, - Subcontracting: SUBCONTRACTING, + "Subcontracting": SUBCONTRACTING, "Working together": WORKING_TOGETHER, "Similar Activities": SIMILAR_ACTIVITIES, "Role Discovery": ROLE_DISCOVERY, @@ -619,7 +620,7 @@ const ResultsPage = () => { "Fraction-Case Completions (using PQL)": FRACTION_CASE_COMPLETIONS_PQL, "Average workload": AVERAGE_WORKLOAD, "Average workload (using PQL)": AVERAGE_WORKLOAD_PQL, - Multitasking: MULTITASKING, + "Multitasking": MULTITASKING, "Average Activity Duration": AVERAGE_ACTIVITY_DURATION, "Average case duration": AVERAGE_CASE_DURATION, "Interaction Two Resources": INTERACTION_TWO_RESOURCES, @@ -652,18 +653,32 @@ const ResultsPage = () => { const renderGraphAndTable = () => ( <> - {graphData.map((graph, idx) => ( - - Graph {idx + 1} - - - ))} + {graphData.map((graph, idx) => { + const useArrowGraph = [ + "get_always_before_pql", + "get_always_after_pql", + "get_directly_follows_and_count", + ].includes(selectedOption); + + return ( + + Graph {idx + 1} + {useArrowGraph ? ( + + ) : ( + + )} + + ); + })} + {tableData.map((table, idx) => ( Table {idx + 1} ))} + {floatResult !== null && ( Result @@ -677,6 +692,7 @@ const ResultsPage = () => { )} + {!graphData.length && !tableData.length && floatResult === null && ( No data to display. From 0a1ca3fb163609742c05b38e1090f25340403e5a Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 17:26:20 +0200 Subject: [PATCH 08/18] Implemented info buttons for all insights --- frontend/src/ResultsPage.js | 161 ++++++++++++++++++++++++++++++++++-- 1 file changed, 152 insertions(+), 9 deletions(-) diff --git a/frontend/src/ResultsPage.js b/frontend/src/ResultsPage.js index b13b5d3..59ec500 100644 --- a/frontend/src/ResultsPage.js +++ b/frontend/src/ResultsPage.js @@ -17,6 +17,9 @@ import { import Graph from "./Graph"; import Table from "./Table"; import ArrowGraph from "./ArrowGraph"; +import Tooltip from "@mui/material/Tooltip"; +import IconButton from "@mui/material/IconButton"; +import InfoIcon from "@mui/icons-material/Info"; import { GET_GENERAL_INSIGHTS, @@ -142,6 +145,23 @@ const LOG_SKELETON_OPTIONS = [ }, ]; +// -------------------- Log Skeleton Descriptions -------------------- +const logSkeletonDescriptions = { + "Get Equivalence": "Checks which activities always occur together in cases.", + "Get Equivalence (PQL)": + "PQL-based variant for identifying equivalent activities.", + "Always Before": "Activity A always occurs before Activity B.", + "Always Before (PQL)": "PQL-based rule for 'always before' relationships.", + "Always After": "Activity A always occurs after Activity B.", + "Always After (PQL)": "PQL-based rule for 'always after' relationships.", + "Never Together": "Detects mutually exclusive activity pairs.", + "Never Together (PQL)": "PQL variant for mutual exclusivity.", + "Directly Follows": "Identifies direct succession between activities.", + "Activity Frequencies": "Counts how frequently each activity occurs.", + "Directly Follows and Count (PQL)": + "Shows direct follow relationships with frequency (PQL).", +}; + // --------------------Declarative Constraints Options -------------------- const DECLARATIVE_OPTIONS = [ { label: "Existence", endpoint: GET_EXISTANCE_VIOLATIONS }, @@ -187,6 +207,39 @@ const DECLARATIVE_OPTIONS = [ }, ]; +// -------------------- Declarative Constraints Descriptions -------------------- +const declarativeDescriptions = { + Existence: + "Ensures that a particular activity occurs at least once in a trace.", + Never: "Specifies that a particular activity must not occur in a trace.", + "Exactly Once": "Restricts an activity to occur exactly one time per trace.", + Initially: "Requires that a specific activity is the first in every trace.", + "Responded Existence": + "If Activity A occurs, then Activity B must also occur somewhere in the trace.", + "Co-Existence": + "Activities A and B must either both occur or both be absent in a trace.", + "Always After": + "If Activity A occurs, Activity B must follow it at some point.", + "Always Before": + "If Activity B occurs, Activity A must have occurred before it.", + Succession: + "If Activity A occurs, then Activity B must occur afterwards, and vice versa.", + "Alternate Precedence": + "Every occurrence of Activity B must be preceded by exactly one occurrence of Activity A.", + "Alternate Succession": + "Every occurrence of Activity A must be followed by exactly one occurrence of Activity B.", + "Immediately After": + "Activity B must directly follow Activity A whenever A occurs.", + "Immediately Before": + "Activity A must directly precede Activity B whenever B occurs.", + "Chain Succession": + "Every occurrence of Activity A must be immediately followed by B, and every B must be preceded by A.", + "Non Co-Existence": + "Activities A and B cannot both appear in the same trace.", + "Non Succession": "Activity A should never be followed by Activity B.", + "Non Chain Succession": "Activity B must not immediately follow Activity A.", +}; + // -------------------- Resource Options -------------------- const resourceOptions = { @@ -225,6 +278,48 @@ const resourceOptions = { ], }; +// -------------------- Resource Descriptions -------------------- +const resourceDescriptions = { + "Handover of Work": + "Measures how often one resource hands over work to another.", + Subcontracting: + "Captures indirect handovers involving intermediate activities.", + "Working together": + "Indicates joint involvement of two resources in the same case.", + "Similar Activities": + "Identifies resources that perform similar types of activities.", + "Role Discovery": + "Finds patterns in activity-resource relationships to infer roles.", + "Group Relative Focus": + "Shows how much focus a group has on specific activities.", + "Group Relative Stake": "Measures involvement of a group in different cases.", + "Group Coverage": "Indicates how many activities a group is involved in.", + "Group Member Contributions": + "Shows individual contribution level in a group.", + "Distinct Activities": "Counts unique activities per resource.", + "Distinct Activities (using PQL)": + "Same as Distinct Activities but queried via PQL.", + "Activity Frequency": + "Counts how often a specific activity is performed by a resource.", + "Activity Frequency (using PQL)": "Same using PQL query.", + "Activity Completions": "Counts completed activities by resource.", + "Activity Completions (using PQL)": "Same via PQL.", + "Case-Completions": "Counts completed cases per resource.", + "Case-Completions (using PQL)": "Same via PQL.", + "Fraction-Case Completions": "Fraction of cases completed relative to total.", + "Fraction-Case Completions (using PQL)": "Same via PQL.", + "Average workload": "Mean number of active tasks assigned to a resource.", + "Average workload (using PQL)": "Same using PQL query.", + Multitasking: "Measures how many tasks are done in parallel.", + "Average Activity Duration": + "Average duration to complete a single activity.", + "Average case duration": "Mean duration to complete a case.", + "Interaction Two Resources": + "Interaction frequency between two specific resources.", + "Interaction Two Resources (using PQL)": "Same using PQL.", + "Social Position": "A relative score indicating the influence of a resource.", +}; + // -------------------- Main Component -------------------- const ResultsPage = () => { @@ -510,7 +605,7 @@ const ResultsPage = () => { const handleSNAOrOrgOption = async (selected) => { const endpointMap = { "Handover of Work": HANDOVER_OF_WORK, - "Subcontracting": SUBCONTRACTING, + Subcontracting: SUBCONTRACTING, "Working together": WORKING_TOGETHER, "Similar Activities": SIMILAR_ACTIVITIES, "Role Discovery": ROLE_DISCOVERY, @@ -620,7 +715,7 @@ const ResultsPage = () => { "Fraction-Case Completions (using PQL)": FRACTION_CASE_COMPLETIONS_PQL, "Average workload": AVERAGE_WORKLOAD, "Average workload (using PQL)": AVERAGE_WORKLOAD_PQL, - "Multitasking": MULTITASKING, + Multitasking: MULTITASKING, "Average Activity Duration": AVERAGE_ACTIVITY_DURATION, "Average case duration": AVERAGE_CASE_DURATION, "Interaction Two Resources": INTERACTION_TWO_RESOURCES, @@ -786,10 +881,36 @@ const ResultsPage = () => { ) } disabled={!jobId} + renderValue={(value) => { + const label = + LOG_SKELETON_OPTIONS.find((opt) => opt.value === value) + ?.label || value; + return ( + + {label} + {logSkeletonDescriptions[label] && ( + + + + + + )} + + ); + }} > {LOG_SKELETON_OPTIONS.map((opt) => ( - {opt.label} + + {opt.label} + {logSkeletonDescriptions[opt.label] && ( + + + + + + )} + ))} @@ -819,9 +940,16 @@ const ResultsPage = () => { - + + + + + + + + {showResultLoading ? ( @@ -872,7 +1000,16 @@ const ResultsPage = () => { > {DECLARATIVE_OPTIONS.map((opt) => ( - {opt.label} + + {opt.label} + {declarativeDescriptions[opt.label] && ( + + + + + + )} + ))} @@ -985,7 +1122,6 @@ const ResultsPage = () => { onChange={(e) => { const selected = e.target.value; setSelectedResourceOption(selected); - if (selectedResourceType === "resource_profiles") { handleResourceProfileFetch(selected); } else { @@ -995,7 +1131,14 @@ const ResultsPage = () => { > {resourceOptions[selectedResourceType].map((opt) => ( - {opt} + + {opt} + + + + + + ))} From fe30b1fc7c2f0cec496d874ff1e91e599b897e28 Mon Sep 17 00:00:00 2001 From: Yash Raj Date: Sun, 15 Jun 2025 18:27:35 +0200 Subject: [PATCH 09/18] Adding PQL queries --- backend/api/modules/declarative_router.py | 45 ++++++- .../tasks/declarative_constraints_tasks.py | 3 + .../declarative_constraints.py | 23 +++- backend/pql_queries/declarative_queries.py | 124 ++++++++++++++++++ backend/pql_queries/declerative_queries.py | 1 - 5 files changed, 193 insertions(+), 3 deletions(-) create mode 100644 backend/pql_queries/declarative_queries.py delete mode 100644 backend/pql_queries/declerative_queries.py diff --git a/backend/api/modules/declarative_router.py b/backend/api/modules/declarative_router.py index 2092865..acd9f30 100644 --- a/backend/api/modules/declarative_router.py +++ b/backend/api/modules/declarative_router.py @@ -14,6 +14,7 @@ from backend.celonis_connection.celonis_connection_manager import ( CelonisConnectionManager, ) +from backend.pql_queries import declarative_queries # **************** Type Aliases **************** @@ -33,6 +34,7 @@ async def compute_declarative_constraints( request: Request, min_support: float = Query(0.3, description="Minimum support ratio"), min_confidence: float = Query(0.75, description="Minimum confidence ratio"), + fitness_score: float = Query(1.0, description="Fitness score for the constraints"), celonis: CelonisConnectionManager = Depends(get_celonis_connection), ) -> Dict[str, str]: """Computes the declarative constraints and stores it. @@ -48,6 +50,7 @@ async def compute_declarative_constraints( Defaults to Depends(get_celonis_connection). min_support: The minimum support ratio for the constraints. min_confidence: The minimum confidence ratio for the constraints. + fitness_score: The fitness score for the constraints. Returns: A dictionary containing the job ID of the scheduled task. @@ -65,12 +68,13 @@ async def compute_declarative_constraints( celonis, min_support, min_confidence, + fitness_score, ) return {"job_id": job_id} -# **************** Retrieving Declarative Model Attributes **************** +# **************** Retrieving Declarative Model Attributes - PM4PY **************** @router.get("/get_existance_violations/{job_id}") @@ -363,3 +367,42 @@ def get_nonchainsuccession_violations(job_id: str, request: Request) -> ReturnGr verify_correct_job_module(job_id, request, MODULE_NAME) return request.app.state.jobs[job_id].result.get("nonchainsuccession", []) + + +# **************** Retrieving Declarative Model Attributes - PM4PY **************** + + +@router.get("/get_always_after_pql/") +def get_always_after_pql( + request: Request, + celonis: CelonisConnectionManager = Depends(get_celonis_connection), +) -> ReturnGraphType: + """Retrieves the always-after relations via PQL. + + Args: + request: The FastAPI request object. + celonis: The CelonisManager dependency injection. + + Returns: + A JSON object with "tables" and "graphs" keys. + """ + result_df = declarative_queries.get_always_after_relation(celonis) + return result_df + + +@router.get("/get_always_before_pql/") +def get_always_before_pql( + request: Request, + celonis: CelonisConnectionManager = Depends(get_celonis_connection), +) -> ReturnGraphType: + """Retrieves the always-before relations via PQL. + + Args: + request: The FastAPI request object. + celonis: The CelonisManager dependency injection. + + Returns: + A JSON object with "tables" and "graphs" keys. + """ + result_df = declarative_queries.get_always_before_relation(celonis) + return result_df diff --git a/backend/api/tasks/declarative_constraints_tasks.py b/backend/api/tasks/declarative_constraints_tasks.py index f2072a2..e83c605 100644 --- a/backend/api/tasks/declarative_constraints_tasks.py +++ b/backend/api/tasks/declarative_constraints_tasks.py @@ -15,6 +15,7 @@ def compute_and_store_declarative_constraints( celonis: CelonisConnectionManager, min_support_ratio: float = 0.3, min_confidence_ratio: float = 0.75, + fitness_score: float = 1.0, ) -> None: """Computes the declarative constraints and stores it in the app state. @@ -24,6 +25,7 @@ def compute_and_store_declarative_constraints( celonis: The CelonisConnectionManager instance. min_support_ratio: The minimum support ratio for the constraints. min_confidence_ratio: The minimum confidence ratio for the constraints. + fitness_score: The fitness score for the constraints. """ # Get the job record from the app state rec: JobStatus = app.state.jobs[job_id] @@ -42,6 +44,7 @@ def compute_and_store_declarative_constraints( rec.result = dc.update_model_and_run_all_rules( min_support_ratio=min_support_ratio, min_confidence_ratio=min_confidence_ratio, + fitness_score=fitness_score, ) rec.status = "complete" diff --git a/backend/conformance_checking/declarative_constraints.py b/backend/conformance_checking/declarative_constraints.py index 7f8be4a..f992065 100644 --- a/backend/conformance_checking/declarative_constraints.py +++ b/backend/conformance_checking/declarative_constraints.py @@ -34,6 +34,7 @@ def __init__( log: pd.DataFrame, min_support_ratio: Optional[float] = 0.3, min_confidence_ratio: Optional[float] = 0.75, + fitness_score: Optional[float] = 1.0, case_id_col: Optional[str] = None, activity_col: Optional[str] = None, timestamp_col: Optional[str] = None, @@ -48,6 +49,8 @@ def __init__( Defaults to 0.3. min_confidence_ratio: The minimum confidence ratio for discovering rules. Defaults to 0.75. + fitness_score: The fitness score threshold for conformance checking. + Defaults to 1.0. case_id_col : The name of the column containing case IDs. activity_col : The name of the column containing activity names. timestamp_col : The name of the column containing timestamps. @@ -55,6 +58,7 @@ def __init__( self.log = log self.min_support_ratio = min_support_ratio self.min_confidence_ratio = min_confidence_ratio + self.fitness_score = fitness_score self.declare_model: Optional[DeclareModelType] = None self.case_id_col: Optional[str] = case_id_col self.activity_col: Optional[str] = activity_col @@ -91,6 +95,7 @@ def run_model( log: Optional[pd.DataFrame] = None, min_support_ratio: Optional[float] = None, min_confidence_ratio: Optional[float] = None, + fitness_score: Optional[float] = None, ) -> None: """Runs the declarative model on the event log. @@ -100,6 +105,7 @@ def run_model( log: The event log to use. min_support_ratio: The minimum support ratio for discovering rules. min_confidence_ratio: The minimum confidence ratio for discovering rules. + fitness_score: The fitness score threshold for conformance checking. """ if log is None: log = self.log @@ -107,6 +113,8 @@ def run_model( min_support_ratio = self.min_support_ratio if min_confidence_ratio is None: min_confidence_ratio = self.min_confidence_ratio + if fitness_score is not None: + self.fitness_score = fitness_score self.declare_model = pm4py.discover_declare( # type: ignore log, min_support_ratio=min_support_ratio, @@ -121,6 +129,7 @@ def rule_specific_violation_summary( declare_model: Optional[DeclareModelType] = None, log: Optional[pd.DataFrame] = None, rule_name: Optional[str] = None, + fitness_score: Optional[float] = None, verbose: bool = False, ) -> ReturnGraphType: """Summarizes number of violations for a declarative rule. @@ -133,6 +142,7 @@ def rule_specific_violation_summary( log: The event log. If None, uses the default log. rule_name: Name of the rule to check. verbose: Whether to print details for debugging. + fitness_score: The fitness score threshold for conformance checking. Returns: Summary with graph and table information of rule violations. @@ -146,11 +156,15 @@ def rule_specific_violation_summary( declare_model = self.declare_model if log is None: log = self.log + if fitness_score is not None: + self.fitness_score = fitness_score if str(rule_name) not in self.valid_rules: raise ValueError( f"Unsupported rule: '{rule_name}'. Must be one of: {self.valid_rules}" ) + if self.fitness_score is None: + self.fitness_score = 1.0 if declare_model is None: raise ValueError("Declare model is stil None. Something has gone wrong.") @@ -171,7 +185,9 @@ def rule_specific_violation_summary( else: A, B = rule_key, None # type: ignore diagnostics = decl_conf.apply(log, {rule_name: {(A, B): rule_info}}) # type: ignore - violated = [d for d in diagnostics if d["dev_fitness"] < 1.0] # type: ignore + violated = [ + d for d in diagnostics if d["dev_fitness"] < self.fitness_score + ] # type: ignore violation_count = len(violated) # type: ignore if violation_count > 0: @@ -410,6 +426,7 @@ def update_model_and_run_all_rules( log: Optional[pd.DataFrame] = None, min_support_ratio: Optional[float] = None, min_confidence_ratio: Optional[float] = None, + fitness_score: Optional[float] = 1.0, list_of_rules: Optional[List[str]] = None, run_from_scratch: Optional[bool] = False, ) -> Any: @@ -423,6 +440,7 @@ def update_model_and_run_all_rules( valid rules. run_from_scratch: If True, re-evaluates all rules even if results stored. + fitness_score: The fitness score threshold for conformance checking. Returns: Dictionary of all violations. @@ -435,11 +453,14 @@ def update_model_and_run_all_rules( min_confidence_ratio = self.min_confidence_ratio if list_of_rules is None: list_of_rules = self.valid_rules + if fitness_score is not None: + self.fitness_score = fitness_score self.run_model( log=log, min_support_ratio=min_support_ratio, min_confidence_ratio=min_confidence_ratio, + fitness_score=fitness_score, ) for rule in list_of_rules: self.temp = self.get_declarative_conformance_diagnostics( diff --git a/backend/pql_queries/declarative_queries.py b/backend/pql_queries/declarative_queries.py new file mode 100644 index 0000000..5156a33 --- /dev/null +++ b/backend/pql_queries/declarative_queries.py @@ -0,0 +1,124 @@ +"""Queries that can be used to get log-skeleton related data from celonis.""" + +from itertools import combinations +from typing import Dict, List, TypeAlias, Union + +from pandas import DataFrame +import pandas as pd + +from backend.celonis_connection.celonis_connection_manager import ( + CelonisConnectionManager, +) +from backend.pql_queries.general_queries import get_activities + +# **************** Type Aliases **************** + +TableType: TypeAlias = Dict[str, Union[List[str], List[List[str]]]] +GraphType: TypeAlias = Dict[str, List[Dict[str, str]]] +ReturnGraphType: TypeAlias = Dict[str, Union[List[TableType], List[GraphType]]] + +# **************** Formatting Function **************** + +def format_graph_and_table (curr_df: pd.DataFrame) -> ReturnGraphType: + """Formats the DataFrame into a graph and table structure. + + Args: + curr_df (pd.DataFrame): The DataFrame to format. + + Returns: + ReturnGraphType: A dictionary containing the formatted graph and table. + """ + output: ReturnGraphType = {"graphs": [], "tables": []} + + if not curr_df.empty : + if curr_df.shape[1] == 3 : + nodes = [] + edges = [] + for i, row in curr_df.iterrows(): # type: ignore + nodes.append(str(row[curr_df.columns[0]])) # type: ignore + nodes.append(str(row[curr_df.columns[1]])) # type: ignore + edges.append({ # type: ignore + 'from': str(row[curr_df.columns[0]]), # type: ignore + 'to': str(row[curr_df.columns[1]]), # type: ignore + 'label': str(row[curr_df.columns[2]]) # type: ignore + }) + + output["graphs"].append({ + "nodes": list(set(list(nodes))), # type: ignore + "edges": edges + }) + + nodes = [{'id': str(ele)} for ele in list(set(list(nodes)))] # type: ignore + headers = list(curr_df.columns) + rows = curr_df.values.tolist() # type: ignore + output["tables"].append({ + "headers": headers, # type: ignore + "rows": [[str(ele) for ele in row] for row in rows] # type: ignore + }) + else : + headers = list(curr_df.columns) + rows = curr_df.values.tolist() # type: ignore + output["tables"].append({ + "headers": headers, # type: ignore + "rows": [[str(ele) for ele in row] for row in rows] # type: ignore + }) + return output + +# **************** PQL Functions **************** + +# Always before +def get_always_before_relation(celonis: CelonisConnectionManager) -> ReturnGraphType: + """Compute Always-Before summary using PQL. + + Args: + celonis (CelonisConnectionManager): the celonis connection + + Returns: + ReturnGraphType: A dictionary containing the formatted graph and table. + """ + target_df: pd.DataFrame = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"]) + act_table = get_activities(celonis) # type: ignore + activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore + for i, pair in enumerate(activitiy_pairs): # type: ignore + query = { + "A before B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src, + NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + "B before A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src, + NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + } + pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore + if (pair_df["B before A"] == 1).any() and not (pair_df["A before B"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[1], pair[0], int((pair_df["B before A"] == 1).sum())] # type: ignore + elif (pair_df["A before B"] == 1).any() and not (pair_df["B before A"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[0], pair[1], int((pair_df["A before B"] == 1).sum())] # type: ignore + output = format_graph_and_table(target_df) + return output + + +# Always after +def get_always_after_relation(celonis: CelonisConnectionManager) -> ReturnGraphType: + """Compute Always-After summary using PQL. + + Args: + celonis (CelonisConnectionManager): the celonis connection + + Returns: + ReturnGraphType: A dictionary containing the formatted graph and table. + """ + target_df = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"]) + act_table = get_activities(celonis) + activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore + for i, pair in enumerate(activitiy_pairs): # type: ignore + query = { + "A after B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src, + NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + "B after A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src, + NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + } + pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore + if (pair_df["B after A"] == 1).any() and not (pair_df["A after B"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[1], pair[0], int((pair_df["B after A"] == 1).sum())] # type: ignore + elif (pair_df["A after B"] == 1).any() and not (pair_df["B after A"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[0], pair[1], int((pair_df["A after B"] == 1).sum())] # type: ignore + output = format_graph_and_table(target_df) + return output \ No newline at end of file diff --git a/backend/pql_queries/declerative_queries.py b/backend/pql_queries/declerative_queries.py deleted file mode 100644 index 79ae0ec..0000000 --- a/backend/pql_queries/declerative_queries.py +++ /dev/null @@ -1 +0,0 @@ -"""Queries used to get declerative constraint related data from celonis.""" From 1917b277bf14db53e702a8177a7f7942003b84a8 Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 18:35:26 +0200 Subject: [PATCH 10/18] Added new button for Declarative --- frontend/src/ResultsPage.js | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/frontend/src/ResultsPage.js b/frontend/src/ResultsPage.js index 59ec500..a54367a 100644 --- a/frontend/src/ResultsPage.js +++ b/frontend/src/ResultsPage.js @@ -346,6 +346,7 @@ const ResultsPage = () => { // Declarative Constraints const [minSupport, setMinSupport] = useState(""); const [minConfidence, setMinConfidence] = useState(""); + const [zetaValue, setZetaValue] = useState(""); const [declJobId, setDeclJobId] = useState(null); const [selectedDeclOption, setSelectedDeclOption] = useState(""); const [declLoading, setDeclLoading] = useState(false); @@ -548,9 +549,14 @@ const ResultsPage = () => { } try { - const url = `${COMPUTE_DECLARATIVE_CONSTRAINTS}?min_support=${parseFloat( - minSupport - )}&min_confidence=${parseFloat(minConfidence)}`; + const queryParams = new URLSearchParams({ + min_support: parseFloat(minSupport), + min_confidence: parseFloat(minConfidence), + }); + if (zetaValue) { + queryParams.append("zeta", parseFloat(zetaValue)); + } + const url = `${COMPUTE_DECLARATIVE_CONSTRAINTS}?${queryParams.toString()}`; const res = await fetch(url, { method: "GET" }); const data = await res.json(); setDeclJobId(data.job_id); @@ -980,6 +986,14 @@ const ResultsPage = () => { onChange={(e) => setMinConfidence(e.target.value)} fullWidth /> + setZetaValue(e.target.value)} + fullWidth + /> From f8190acb60063d3d4993dbb40fb5f20a86e45aa3 Mon Sep 17 00:00:00 2001 From: Yash Raj Date: Sun, 15 Jun 2025 19:12:17 +0200 Subject: [PATCH 11/18] Fixed TypeAlias Issues --- backend/api/modules/declarative_router.py | 6 +++--- backend/pql_queries/declarative_queries.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/api/modules/declarative_router.py b/backend/api/modules/declarative_router.py index acd9f30..b8e35a0 100644 --- a/backend/api/modules/declarative_router.py +++ b/backend/api/modules/declarative_router.py @@ -1,7 +1,7 @@ """Contains the routes for handling declarative constraints.""" import uuid -from typing import Dict, List, TypeAlias, Union +from typing import Dict, List, TypeAlias, Union, Any from fastapi import APIRouter, BackgroundTasks, Depends, Request, Query @@ -376,7 +376,7 @@ def get_nonchainsuccession_violations(job_id: str, request: Request) -> ReturnGr def get_always_after_pql( request: Request, celonis: CelonisConnectionManager = Depends(get_celonis_connection), -) -> ReturnGraphType: +) -> Dict[str, Union[List[TableType], List[GraphType]]]: """Retrieves the always-after relations via PQL. Args: @@ -394,7 +394,7 @@ def get_always_after_pql( def get_always_before_pql( request: Request, celonis: CelonisConnectionManager = Depends(get_celonis_connection), -) -> ReturnGraphType: +) -> Dict[str, Union[List[TableType], List[GraphType]]]: """Retrieves the always-before relations via PQL. Args: diff --git a/backend/pql_queries/declarative_queries.py b/backend/pql_queries/declarative_queries.py index 5156a33..9993b0f 100644 --- a/backend/pql_queries/declarative_queries.py +++ b/backend/pql_queries/declarative_queries.py @@ -42,13 +42,13 @@ def format_graph_and_table (curr_df: pd.DataFrame) -> ReturnGraphType: 'to': str(row[curr_df.columns[1]]), # type: ignore 'label': str(row[curr_df.columns[2]]) # type: ignore }) - + + nodes = [{'id': str(ele)} for ele in list(set(list(nodes)))] # type: ignore output["graphs"].append({ - "nodes": list(set(list(nodes))), # type: ignore + "nodes": nodes, # type: ignore "edges": edges }) - - nodes = [{'id': str(ele)} for ele in list(set(list(nodes)))] # type: ignore + headers = list(curr_df.columns) rows = curr_df.values.tolist() # type: ignore output["tables"].append({ From 4a276c590fcf36bf2aad7424503c7f6ad89660c5 Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 20:31:15 +0200 Subject: [PATCH 12/18] Updated info pages for temporal and resource based --- frontend/src/ResultsPage.js | 138 +++++++++++++++++++++++++++--------- 1 file changed, 105 insertions(+), 33 deletions(-) diff --git a/frontend/src/ResultsPage.js b/frontend/src/ResultsPage.js index a54367a..a7403cc 100644 --- a/frontend/src/ResultsPage.js +++ b/frontend/src/ResultsPage.js @@ -281,43 +281,59 @@ const resourceOptions = { // -------------------- Resource Descriptions -------------------- const resourceDescriptions = { "Handover of Work": - "Measures how often one resource hands over work to another.", + "The Handover of Work metric measures how many times an individual is followed by another individual in the execution of a business process.", Subcontracting: - "Captures indirect handovers involving intermediate activities.", + "The Subcontracting metric calculates how many times the work of an individual is interleaved by the work of another individual, only to eventually “return” to the original individual.", "Working together": - "Indicates joint involvement of two resources in the same case.", + "The Working Together metric calculates how many times two individuals work together to resolve a process instance.", "Similar Activities": - "Identifies resources that perform similar types of activities.", + "The Similar Activities metric calculates how similar the work patterns are between two individuals.", "Role Discovery": - "Finds patterns in activity-resource relationships to infer roles.", + "The organizational role is a set of activities in the log that are executed by a similar (multi)set of resources.", "Group Relative Focus": - "Shows how much focus a group has on specific activities.", - "Group Relative Stake": "Measures involvement of a group in different cases.", - "Group Coverage": "Indicates how many activities a group is involved in.", + "The Group Relative Focus metric specifies for a given work how much a resource group performed this type of work compared to the overall workload of the group. It can be used to measure how the workload of a resource group is distributed over different types of work, i.e., work diversification of the group.", + "Group Relative Stake": + "The Group Relative Stake metric specifies for a given work how much this type of work was performed by a certain resource group among all groups. It can be used to measure how the workload devoted to a certain type of work is distributed over resource groups in an organizational model, i.e., work participation by different groups.", + "Group Coverage": + "The Group Coverage metric with respect to a given type of work, specifies the proportion of members of a resource group that performed this type of work.", "Group Member Contributions": - "Shows individual contribution level in a group.", - "Distinct Activities": "Counts unique activities per resource.", + "The Group Member Contribution metric of a member of a resource group with respect to a given type of work specifies how much of this type of work by the group was performed by the member. It can be used to measure how the workload of the entire group devoted to a certain type of work is distributed over the group members.", + "Distinct Activities": + "Number of distinct activities done by a resource in a given time interval [t1, t2).", "Distinct Activities (using PQL)": - "Same as Distinct Activities but queried via PQL.", + "Number of distinct activities done by a resource in a given time interval [t1, t2) using PQL Queries.", "Activity Frequency": - "Counts how often a specific activity is performed by a resource.", - "Activity Frequency (using PQL)": "Same using PQL query.", - "Activity Completions": "Counts completed activities by resource.", - "Activity Completions (using PQL)": "Same via PQL.", - "Case-Completions": "Counts completed cases per resource.", - "Case-Completions (using PQL)": "Same via PQL.", - "Fraction-Case Completions": "Fraction of cases completed relative to total.", - "Fraction-Case Completions (using PQL)": "Same via PQL.", - "Average workload": "Mean number of active tasks assigned to a resource.", - "Average workload (using PQL)": "Same using PQL query.", - Multitasking: "Measures how many tasks are done in parallel.", + "Fraction of completions of a given activity a by a given resource r during a given time slot [t1, t2), with respect to the total number of activity completions by resource r during [t1, t2).", + "Activity Frequency (using PQL)": + "Fraction of completions of a given activity a by a given resource r during a given time slot [t1, t2), with respect to the total number of activity completions by resource r during [t1, t2) using PQL queries.", + "Activity Completions": + "The number of activity instances completed by a given resource during a given time slot.", + "Activity Completions (using PQL)": + "The number of activity instances completed by a given resource during a given time slot using PQL queries.", + "Case-Completions": + "The number of cases completed during a given time slot in which a given resource was involved.", + "Case-Completions (using PQL)": + "The number of cases completed during a given time slot in which a given resource was involved using PQL queries.", + "Fraction-Case Completions": + "The fraction of cases completed during a given time slot in which a given resource was involved with respect to the total number of cases completed during the time slot.", + "Fraction-Case Completions (using PQL)": + "The fraction of cases completed during a given time slot in which a given resource was involved with respect to the total number of cases completed during the time slot using PQL queries.", + "Average workload": + "The average number of activities started by a given resource but not completed at a moment in time.", + "Average workload (using PQL)": + "The average number of activities started by a given resource but not completed at a moment in time using PQL queries.", + Multitasking: + "The fraction of active time during which a given resource is involved in more than one activity with respect to the resource's active time.", "Average Activity Duration": - "Average duration to complete a single activity.", - "Average case duration": "Mean duration to complete a case.", + "The average duration of instances of a given activity completed during a given time slot by a given resource.", + "Average case duration": + "The average duration of cases completed during a given time slot in which a given resource was involved.", "Interaction Two Resources": - "Interaction frequency between two specific resources.", - "Interaction Two Resources (using PQL)": "Same using PQL.", - "Social Position": "A relative score indicating the influence of a resource.", + "The number of cases completed during a given time slot in which two given resources were involved.", + "Interaction Two Resources (using PQL)": + "The number of cases completed during a given time slot in which two given resources were involved using PQL queries.", + "Social Position": + "The fraction of resources involved in the same cases with a given resource during a given time slot with respect to the total number of resources active during the time slot.", }; // -------------------- Main Component -------------------- @@ -895,7 +911,21 @@ const ResultsPage = () => { {label} {logSkeletonDescriptions[label] && ( - + + {logSkeletonDescriptions[label]} + + } + arrow + placement="right" + > @@ -910,7 +940,21 @@ const ResultsPage = () => { {opt.label} {logSkeletonDescriptions[opt.label] && ( - + + {logSkeletonDescriptions[opt.label]} + + } + arrow + placement="right" + > @@ -950,9 +994,22 @@ const ResultsPage = () => { - - - + + The temporal conformance results consist of the source + activity of the recorded deviation, the target activity of + the recorded deviation, the time passed between the + occurrence of the source activity and the target activity, + and the value of (time passed - mean)/std for this + occurrence (zeta). + + } + placement="right" + arrow + > + + @@ -1147,7 +1204,22 @@ const ResultsPage = () => { {opt} - + + {resourceDescriptions[opt] || + "No description available."} + + } + arrow + placement="right" + > From 920fdbc965cda5bb67451b8b2af08f37843bd6a1 Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 21:08:09 +0200 Subject: [PATCH 13/18] Updated page for declarative and info buttons update --- frontend/src/ResultsPage.js | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/frontend/src/ResultsPage.js b/frontend/src/ResultsPage.js index a7403cc..3407c60 100644 --- a/frontend/src/ResultsPage.js +++ b/frontend/src/ResultsPage.js @@ -45,7 +45,9 @@ import { GET_RESPONDED_EXISTENCE_VIOLATIONS, GET_COEXISTENCE_VIOLATIONS, GET_RESPONSE_VIOLATIONS, + GET_DECL_ALWAYS_AFTER_PQL, GET_PRECEDENCE_VIOLATIONS, + GET_DECL_ALWAYS_BEFORE_PQL, GET_SUCCESSION_VIOLATIONS, GET_ALTPRECEDENCE_VIOLATIONS, GET_ALTSUCCESION_VIOLATIONS, @@ -174,7 +176,9 @@ const DECLARATIVE_OPTIONS = [ }, { label: "Co-Existence", endpoint: GET_COEXISTENCE_VIOLATIONS }, { label: "Always After", endpoint: GET_RESPONSE_VIOLATIONS }, + { label: "Always After (PQL)", endpoint: GET_DECL_ALWAYS_AFTER_PQL }, { label: "Always Before", endpoint: GET_PRECEDENCE_VIOLATIONS }, + { label: "Always Before (PQL)", endpoint: GET_DECL_ALWAYS_BEFORE_PQL }, { label: "Succession", endpoint: GET_SUCCESSION_VIOLATIONS }, { label: "Alternate Precedence", @@ -220,8 +224,12 @@ const declarativeDescriptions = { "Activities A and B must either both occur or both be absent in a trace.", "Always After": "If Activity A occurs, Activity B must follow it at some point.", + "Always After (PQL)": + "If Activity A occurs, Activity B must follow it at some point using PQL queries.", "Always Before": "If Activity B occurs, Activity A must have occurred before it.", + "Always Before (PQL)": + "If Activity B occurs, Activity A must have occurred before it using PQL queries.", Succession: "If Activity A occurs, then Activity B must occur afterwards, and vice versa.", "Alternate Precedence": @@ -585,7 +593,8 @@ const ResultsPage = () => { }; const handleDeclarativeOptionSelect = async (option) => { - if (!declJobId) { + const isPQL = option.label.endsWith("(PQL)"); + if (!isPQL && !declJobId) { alert("Please compute constraints first."); return; } @@ -598,8 +607,11 @@ const ResultsPage = () => { try { let attempts = 0; let resultData = null; + const endpoint = option.endpoint; + const fetchURL = isPQL ? endpoint : `${endpoint}/${declJobId}`; + while (attempts < 20) { - const res = await fetch(`${option.endpoint}/${declJobId}`); + const res = await fetch(fetchURL); if (res.ok) { resultData = await res.json(); break; @@ -1074,7 +1086,21 @@ const ResultsPage = () => { {opt.label} {declarativeDescriptions[opt.label] && ( - + + {declarativeDescriptions[opt.label]} + + } + arrow + placement="right" + > From 0a6aee33543f7376e82d10dfe3e5b5d9ff92737f Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 21:14:14 +0200 Subject: [PATCH 14/18] Yash's Declarative constraints changes with PQL queries --- backend/api/modules/declarative_router.py | 45 ++++++- .../tasks/declarative_constraints_tasks.py | 3 + .../declarative_constraints.py | 23 +++- backend/pql_queries/declarative_queries.py | 124 ++++++++++++++++++ backend/pql_queries/declerative_queries.py | 1 - frontend/src/config.js | 4 +- 6 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 backend/pql_queries/declarative_queries.py delete mode 100644 backend/pql_queries/declerative_queries.py diff --git a/backend/api/modules/declarative_router.py b/backend/api/modules/declarative_router.py index 2092865..7d1167a 100644 --- a/backend/api/modules/declarative_router.py +++ b/backend/api/modules/declarative_router.py @@ -14,6 +14,7 @@ from backend.celonis_connection.celonis_connection_manager import ( CelonisConnectionManager, ) +from backend.pql_queries import declarative_queries # **************** Type Aliases **************** @@ -33,6 +34,7 @@ async def compute_declarative_constraints( request: Request, min_support: float = Query(0.3, description="Minimum support ratio"), min_confidence: float = Query(0.75, description="Minimum confidence ratio"), + fitness_score: float = Query(1.0, description="Fitness score for the constraints"), celonis: CelonisConnectionManager = Depends(get_celonis_connection), ) -> Dict[str, str]: """Computes the declarative constraints and stores it. @@ -48,6 +50,7 @@ async def compute_declarative_constraints( Defaults to Depends(get_celonis_connection). min_support: The minimum support ratio for the constraints. min_confidence: The minimum confidence ratio for the constraints. + fitness_score: The fitness score for the constraints. Returns: A dictionary containing the job ID of the scheduled task. @@ -65,12 +68,13 @@ async def compute_declarative_constraints( celonis, min_support, min_confidence, + fitness_score, ) return {"job_id": job_id} -# **************** Retrieving Declarative Model Attributes **************** +# **************** Retrieving Declarative Model Attributes - PM4PY **************** @router.get("/get_existance_violations/{job_id}") @@ -363,3 +367,42 @@ def get_nonchainsuccession_violations(job_id: str, request: Request) -> ReturnGr verify_correct_job_module(job_id, request, MODULE_NAME) return request.app.state.jobs[job_id].result.get("nonchainsuccession", []) + + +# **************** Retrieving Declarative Model Attributes - PQL Queries **************** + + +@router.get("/get_always_after_pql/") +def get_always_after_pql( + request: Request, + celonis: CelonisConnectionManager = Depends(get_celonis_connection), +) -> Dict[str, Union[List[TableType], List[GraphType]]]: + """Retrieves the always-after relations via PQL. + + Args: + request: The FastAPI request object. + celonis: The CelonisManager dependency injection. + + Returns: + A JSON object with "tables" and "graphs" keys. + """ + result_df = declarative_queries.get_always_after_relation(celonis) + return result_df + + +@router.get("/get_always_before_pql/") +def get_always_before_pql( + request: Request, + celonis: CelonisConnectionManager = Depends(get_celonis_connection), +) -> Dict[str, Union[List[TableType], List[GraphType]]]: + """Retrieves the always-before relations via PQL. + + Args: + request: The FastAPI request object. + celonis: The CelonisManager dependency injection. + + Returns: + A JSON object with "tables" and "graphs" keys. + """ + result_df = declarative_queries.get_always_before_relation(celonis) + return result_df diff --git a/backend/api/tasks/declarative_constraints_tasks.py b/backend/api/tasks/declarative_constraints_tasks.py index f2072a2..e83c605 100644 --- a/backend/api/tasks/declarative_constraints_tasks.py +++ b/backend/api/tasks/declarative_constraints_tasks.py @@ -15,6 +15,7 @@ def compute_and_store_declarative_constraints( celonis: CelonisConnectionManager, min_support_ratio: float = 0.3, min_confidence_ratio: float = 0.75, + fitness_score: float = 1.0, ) -> None: """Computes the declarative constraints and stores it in the app state. @@ -24,6 +25,7 @@ def compute_and_store_declarative_constraints( celonis: The CelonisConnectionManager instance. min_support_ratio: The minimum support ratio for the constraints. min_confidence_ratio: The minimum confidence ratio for the constraints. + fitness_score: The fitness score for the constraints. """ # Get the job record from the app state rec: JobStatus = app.state.jobs[job_id] @@ -42,6 +44,7 @@ def compute_and_store_declarative_constraints( rec.result = dc.update_model_and_run_all_rules( min_support_ratio=min_support_ratio, min_confidence_ratio=min_confidence_ratio, + fitness_score=fitness_score, ) rec.status = "complete" diff --git a/backend/conformance_checking/declarative_constraints.py b/backend/conformance_checking/declarative_constraints.py index 7f8be4a..f992065 100644 --- a/backend/conformance_checking/declarative_constraints.py +++ b/backend/conformance_checking/declarative_constraints.py @@ -34,6 +34,7 @@ def __init__( log: pd.DataFrame, min_support_ratio: Optional[float] = 0.3, min_confidence_ratio: Optional[float] = 0.75, + fitness_score: Optional[float] = 1.0, case_id_col: Optional[str] = None, activity_col: Optional[str] = None, timestamp_col: Optional[str] = None, @@ -48,6 +49,8 @@ def __init__( Defaults to 0.3. min_confidence_ratio: The minimum confidence ratio for discovering rules. Defaults to 0.75. + fitness_score: The fitness score threshold for conformance checking. + Defaults to 1.0. case_id_col : The name of the column containing case IDs. activity_col : The name of the column containing activity names. timestamp_col : The name of the column containing timestamps. @@ -55,6 +58,7 @@ def __init__( self.log = log self.min_support_ratio = min_support_ratio self.min_confidence_ratio = min_confidence_ratio + self.fitness_score = fitness_score self.declare_model: Optional[DeclareModelType] = None self.case_id_col: Optional[str] = case_id_col self.activity_col: Optional[str] = activity_col @@ -91,6 +95,7 @@ def run_model( log: Optional[pd.DataFrame] = None, min_support_ratio: Optional[float] = None, min_confidence_ratio: Optional[float] = None, + fitness_score: Optional[float] = None, ) -> None: """Runs the declarative model on the event log. @@ -100,6 +105,7 @@ def run_model( log: The event log to use. min_support_ratio: The minimum support ratio for discovering rules. min_confidence_ratio: The minimum confidence ratio for discovering rules. + fitness_score: The fitness score threshold for conformance checking. """ if log is None: log = self.log @@ -107,6 +113,8 @@ def run_model( min_support_ratio = self.min_support_ratio if min_confidence_ratio is None: min_confidence_ratio = self.min_confidence_ratio + if fitness_score is not None: + self.fitness_score = fitness_score self.declare_model = pm4py.discover_declare( # type: ignore log, min_support_ratio=min_support_ratio, @@ -121,6 +129,7 @@ def rule_specific_violation_summary( declare_model: Optional[DeclareModelType] = None, log: Optional[pd.DataFrame] = None, rule_name: Optional[str] = None, + fitness_score: Optional[float] = None, verbose: bool = False, ) -> ReturnGraphType: """Summarizes number of violations for a declarative rule. @@ -133,6 +142,7 @@ def rule_specific_violation_summary( log: The event log. If None, uses the default log. rule_name: Name of the rule to check. verbose: Whether to print details for debugging. + fitness_score: The fitness score threshold for conformance checking. Returns: Summary with graph and table information of rule violations. @@ -146,11 +156,15 @@ def rule_specific_violation_summary( declare_model = self.declare_model if log is None: log = self.log + if fitness_score is not None: + self.fitness_score = fitness_score if str(rule_name) not in self.valid_rules: raise ValueError( f"Unsupported rule: '{rule_name}'. Must be one of: {self.valid_rules}" ) + if self.fitness_score is None: + self.fitness_score = 1.0 if declare_model is None: raise ValueError("Declare model is stil None. Something has gone wrong.") @@ -171,7 +185,9 @@ def rule_specific_violation_summary( else: A, B = rule_key, None # type: ignore diagnostics = decl_conf.apply(log, {rule_name: {(A, B): rule_info}}) # type: ignore - violated = [d for d in diagnostics if d["dev_fitness"] < 1.0] # type: ignore + violated = [ + d for d in diagnostics if d["dev_fitness"] < self.fitness_score + ] # type: ignore violation_count = len(violated) # type: ignore if violation_count > 0: @@ -410,6 +426,7 @@ def update_model_and_run_all_rules( log: Optional[pd.DataFrame] = None, min_support_ratio: Optional[float] = None, min_confidence_ratio: Optional[float] = None, + fitness_score: Optional[float] = 1.0, list_of_rules: Optional[List[str]] = None, run_from_scratch: Optional[bool] = False, ) -> Any: @@ -423,6 +440,7 @@ def update_model_and_run_all_rules( valid rules. run_from_scratch: If True, re-evaluates all rules even if results stored. + fitness_score: The fitness score threshold for conformance checking. Returns: Dictionary of all violations. @@ -435,11 +453,14 @@ def update_model_and_run_all_rules( min_confidence_ratio = self.min_confidence_ratio if list_of_rules is None: list_of_rules = self.valid_rules + if fitness_score is not None: + self.fitness_score = fitness_score self.run_model( log=log, min_support_ratio=min_support_ratio, min_confidence_ratio=min_confidence_ratio, + fitness_score=fitness_score, ) for rule in list_of_rules: self.temp = self.get_declarative_conformance_diagnostics( diff --git a/backend/pql_queries/declarative_queries.py b/backend/pql_queries/declarative_queries.py new file mode 100644 index 0000000..9993b0f --- /dev/null +++ b/backend/pql_queries/declarative_queries.py @@ -0,0 +1,124 @@ +"""Queries that can be used to get log-skeleton related data from celonis.""" + +from itertools import combinations +from typing import Dict, List, TypeAlias, Union + +from pandas import DataFrame +import pandas as pd + +from backend.celonis_connection.celonis_connection_manager import ( + CelonisConnectionManager, +) +from backend.pql_queries.general_queries import get_activities + +# **************** Type Aliases **************** + +TableType: TypeAlias = Dict[str, Union[List[str], List[List[str]]]] +GraphType: TypeAlias = Dict[str, List[Dict[str, str]]] +ReturnGraphType: TypeAlias = Dict[str, Union[List[TableType], List[GraphType]]] + +# **************** Formatting Function **************** + +def format_graph_and_table (curr_df: pd.DataFrame) -> ReturnGraphType: + """Formats the DataFrame into a graph and table structure. + + Args: + curr_df (pd.DataFrame): The DataFrame to format. + + Returns: + ReturnGraphType: A dictionary containing the formatted graph and table. + """ + output: ReturnGraphType = {"graphs": [], "tables": []} + + if not curr_df.empty : + if curr_df.shape[1] == 3 : + nodes = [] + edges = [] + for i, row in curr_df.iterrows(): # type: ignore + nodes.append(str(row[curr_df.columns[0]])) # type: ignore + nodes.append(str(row[curr_df.columns[1]])) # type: ignore + edges.append({ # type: ignore + 'from': str(row[curr_df.columns[0]]), # type: ignore + 'to': str(row[curr_df.columns[1]]), # type: ignore + 'label': str(row[curr_df.columns[2]]) # type: ignore + }) + + nodes = [{'id': str(ele)} for ele in list(set(list(nodes)))] # type: ignore + output["graphs"].append({ + "nodes": nodes, # type: ignore + "edges": edges + }) + + headers = list(curr_df.columns) + rows = curr_df.values.tolist() # type: ignore + output["tables"].append({ + "headers": headers, # type: ignore + "rows": [[str(ele) for ele in row] for row in rows] # type: ignore + }) + else : + headers = list(curr_df.columns) + rows = curr_df.values.tolist() # type: ignore + output["tables"].append({ + "headers": headers, # type: ignore + "rows": [[str(ele) for ele in row] for row in rows] # type: ignore + }) + return output + +# **************** PQL Functions **************** + +# Always before +def get_always_before_relation(celonis: CelonisConnectionManager) -> ReturnGraphType: + """Compute Always-Before summary using PQL. + + Args: + celonis (CelonisConnectionManager): the celonis connection + + Returns: + ReturnGraphType: A dictionary containing the formatted graph and table. + """ + target_df: pd.DataFrame = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"]) + act_table = get_activities(celonis) # type: ignore + activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore + for i, pair in enumerate(activitiy_pairs): # type: ignore + query = { + "A before B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src, + NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + "B before A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src, + NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + } + pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore + if (pair_df["B before A"] == 1).any() and not (pair_df["A before B"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[1], pair[0], int((pair_df["B before A"] == 1).sum())] # type: ignore + elif (pair_df["A before B"] == 1).any() and not (pair_df["B before A"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[0], pair[1], int((pair_df["A before B"] == 1).sum())] # type: ignore + output = format_graph_and_table(target_df) + return output + + +# Always after +def get_always_after_relation(celonis: CelonisConnectionManager) -> ReturnGraphType: + """Compute Always-After summary using PQL. + + Args: + celonis (CelonisConnectionManager): the celonis connection + + Returns: + ReturnGraphType: A dictionary containing the formatted graph and table. + """ + target_df = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"]) + act_table = get_activities(celonis) + activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore + for i, pair in enumerate(activitiy_pairs): # type: ignore + query = { + "A after B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src, + NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + "B after A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src, + NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", + } + pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore + if (pair_df["B after A"] == 1).any() and not (pair_df["A after B"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[1], pair[0], int((pair_df["B after A"] == 1).sum())] # type: ignore + elif (pair_df["A after B"] == 1).any() and not (pair_df["B after A"] == 1).any(): # type: ignore + target_df.loc[i] = [pair[0], pair[1], int((pair_df["A after B"] == 1).sum())] # type: ignore + output = format_graph_and_table(target_df) + return output \ No newline at end of file diff --git a/backend/pql_queries/declerative_queries.py b/backend/pql_queries/declerative_queries.py deleted file mode 100644 index 79ae0ec..0000000 --- a/backend/pql_queries/declerative_queries.py +++ /dev/null @@ -1 +0,0 @@ -"""Queries used to get declerative constraint related data from celonis.""" diff --git a/frontend/src/config.js b/frontend/src/config.js index 24bd617..286a9e5 100644 --- a/frontend/src/config.js +++ b/frontend/src/config.js @@ -34,7 +34,7 @@ export const GET_EXISTANCE_VIOLATIONS = `${API_BASE}/api/declarative-constraints export const GET_ABSENCE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_absence_violations`; export const GET_EXACTLY_ONE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_exactly_one_violations`; export const GET_INIT_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_init_violations`; -export const GET_RESPONDED_EXISTENCE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_responded_existence_violations`; +export const GET_RESPONDED_EXISTENCE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_responded_existence_violations`; export const GET_COEXISTENCE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_coexistence_violations`; export const GET_RESPONSE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_response_violations`; export const GET_PRECEDENCE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_precedence_violations`; @@ -47,6 +47,8 @@ export const GET_CHAINSUCCESION_VIOLATIONS = `${API_BASE}/api/declarative-constr export const GET_NONCOEXISTENCE_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_noncoexistence_violations`; export const GET_NONSUCCESION_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_nonsuccession_violations`; export const GET_NONCHAINSUCCESION_VIOLATIONS = `${API_BASE}/api/declarative-constraints/get_nonchainsuccession_violations`; +export const GET_DECL_ALWAYS_AFTER_PQL = `${API_BASE}/api/declarative-constraints/get_always_after_pql`; +export const GET_DECL_ALWAYS_BEFORE_PQL = `${API_BASE}/api/declarative-constraints/get_always_before_pql`; // Temporal Profile Endpoints export const TEMPORAL_PROFILE = `${API_BASE}/api/temporal-profile/compute-result`; From a8e94e12b2b9286ce7f584035e4be864a685a1c9 Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 21:30:30 +0200 Subject: [PATCH 15/18] Updated url for declarative pql --- backend/pql_queries/declarative_queries.py | 135 +++++++++++++-------- frontend/src/ResultsPage.js | 4 +- 2 files changed, 87 insertions(+), 52 deletions(-) diff --git a/backend/pql_queries/declarative_queries.py b/backend/pql_queries/declarative_queries.py index 9993b0f..ac8f773 100644 --- a/backend/pql_queries/declarative_queries.py +++ b/backend/pql_queries/declarative_queries.py @@ -19,7 +19,8 @@ # **************** Formatting Function **************** -def format_graph_and_table (curr_df: pd.DataFrame) -> ReturnGraphType: + +def format_graph_and_table(curr_df: pd.DataFrame) -> ReturnGraphType: """Formats the DataFrame into a graph and table structure. Args: @@ -28,44 +29,54 @@ def format_graph_and_table (curr_df: pd.DataFrame) -> ReturnGraphType: Returns: ReturnGraphType: A dictionary containing the formatted graph and table. """ - output: ReturnGraphType = {"graphs": [], "tables": []} + output: ReturnGraphType = {"graphs": [], "tables": []} - if not curr_df.empty : - if curr_df.shape[1] == 3 : + if not curr_df.empty: + if curr_df.shape[1] == 3: nodes = [] edges = [] - for i, row in curr_df.iterrows(): # type: ignore + for i, row in curr_df.iterrows(): # type: ignore nodes.append(str(row[curr_df.columns[0]])) # type: ignore nodes.append(str(row[curr_df.columns[1]])) # type: ignore - edges.append({ # type: ignore - 'from': str(row[curr_df.columns[0]]), # type: ignore - 'to': str(row[curr_df.columns[1]]), # type: ignore - 'label': str(row[curr_df.columns[2]]) # type: ignore - }) - - nodes = [{'id': str(ele)} for ele in list(set(list(nodes)))] # type: ignore - output["graphs"].append({ - "nodes": nodes, # type: ignore - "edges": edges - }) - - headers = list(curr_df.columns) - rows = curr_df.values.tolist() # type: ignore - output["tables"].append({ - "headers": headers, # type: ignore - "rows": [[str(ele) for ele in row] for row in rows] # type: ignore - }) - else : - headers = list(curr_df.columns) - rows = curr_df.values.tolist() # type: ignore - output["tables"].append({ - "headers": headers, # type: ignore - "rows": [[str(ele) for ele in row] for row in rows] # type: ignore - }) + edges.append( + { # type: ignore + "from": str(row[curr_df.columns[0]]), # type: ignore + "to": str(row[curr_df.columns[1]]), # type: ignore + "label": str(row[curr_df.columns[2]]), # type: ignore + } + ) + + nodes = [{"id": str(ele)} for ele in list(set(list(nodes)))] # type: ignore + output["graphs"].append( + { + "nodes": nodes, # type: ignore + "edges": edges, + } + ) + + headers = list(curr_df.columns) + rows = curr_df.values.tolist() # type: ignore + output["tables"].append( + { + "headers": headers, # type: ignore + "rows": [[str(ele) for ele in row] for row in rows], # type: ignore + } + ) + else: + headers = list(curr_df.columns) + rows = curr_df.values.tolist() # type: ignore + output["tables"].append( + { + "headers": headers, # type: ignore + "rows": [[str(ele) for ele in row] for row in rows], # type: ignore + } + ) return output + # **************** PQL Functions **************** + # Always before def get_always_before_relation(celonis: CelonisConnectionManager) -> ReturnGraphType: """Compute Always-Before summary using PQL. @@ -76,23 +87,37 @@ def get_always_before_relation(celonis: CelonisConnectionManager) -> ReturnGraph Returns: ReturnGraphType: A dictionary containing the formatted graph and table. """ - target_df: pd.DataFrame = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"]) - act_table = get_activities(celonis) # type: ignore - activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore - for i, pair in enumerate(activitiy_pairs): # type: ignore + target_df: pd.DataFrame = DataFrame( + columns=["Activity A", "Activity B", "# Occurrences"] + ) + act_table = get_activities(celonis) # type: ignore + activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore + for i, pair in enumerate(activitiy_pairs): # type: ignore query = { "A before B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src, NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", "B before A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src, NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", } - pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore - if (pair_df["B before A"] == 1).any() and not (pair_df["A before B"] == 1).any(): # type: ignore - target_df.loc[i] = [pair[1], pair[0], int((pair_df["B before A"] == 1).sum())] # type: ignore - elif (pair_df["A before B"] == 1).any() and not (pair_df["B before A"] == 1).any(): # type: ignore - target_df.loc[i] = [pair[0], pair[1], int((pair_df["A before B"] == 1).sum())] # type: ignore - output = format_graph_and_table(target_df) - return output + pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore + if (pair_df["B before A"] == 1).any() and not ( + pair_df["A before B"] == 1 + ).any(): # type: ignore + target_df.loc[i] = [ + pair[1], + pair[0], + int((pair_df["B before A"] == 1).sum()), + ] # type: ignore + elif (pair_df["A before B"] == 1).any() and not ( + pair_df["B before A"] == 1 + ).any(): # type: ignore + target_df.loc[i] = [ + pair[0], + pair[1], + int((pair_df["A before B"] == 1).sum()), + ] # type: ignore + output = format_graph_and_table(target_df) + return output # Always after @@ -107,18 +132,28 @@ def get_always_after_relation(celonis: CelonisConnectionManager) -> ReturnGraphT """ target_df = DataFrame(columns=["Activity A", "Activity B", "# Occurrences"]) act_table = get_activities(celonis) - activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore - for i, pair in enumerate(activitiy_pairs): # type: ignore + activitiy_pairs = list(combinations(act_table["Activity"].to_list(), 2)) # type: ignore + for i, pair in enumerate(activitiy_pairs): # type: ignore query = { "A after B": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[1]}'] as src, NODE ['{pair[0]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", "B after A": f"""MATCH_PROCESS ("ACTIVITIES"."concept:name", NODE ['{pair[0]}'] as src, NODE ['{pair[1]}'] as tgt CONNECTED BY EVENTUALLY [src , tgt])""", } - pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore - if (pair_df["B after A"] == 1).any() and not (pair_df["A after B"] == 1).any(): # type: ignore - target_df.loc[i] = [pair[1], pair[0], int((pair_df["B after A"] == 1).sum())] # type: ignore - elif (pair_df["A after B"] == 1).any() and not (pair_df["B after A"] == 1).any(): # type: ignore - target_df.loc[i] = [pair[0], pair[1], int((pair_df["A after B"] == 1).sum())] # type: ignore - output = format_graph_and_table(target_df) - return output \ No newline at end of file + pair_df = celonis.get_dataframe_from_celonis(query) # type: ignore + if (pair_df["B after A"] == 1).any() and not (pair_df["A after B"] == 1).any(): # type: ignore + target_df.loc[i] = [ + pair[1], + pair[0], + int((pair_df["B after A"] == 1).sum()), + ] # type: ignore + elif (pair_df["A after B"] == 1).any() and not ( + pair_df["B after A"] == 1 + ).any(): # type: ignore + target_df.loc[i] = [ + pair[0], + pair[1], + int((pair_df["A after B"] == 1).sum()), + ] # type: ignore + output = format_graph_and_table(target_df) + return output diff --git a/frontend/src/ResultsPage.js b/frontend/src/ResultsPage.js index 3407c60..caaa27e 100644 --- a/frontend/src/ResultsPage.js +++ b/frontend/src/ResultsPage.js @@ -578,7 +578,7 @@ const ResultsPage = () => { min_confidence: parseFloat(minConfidence), }); if (zetaValue) { - queryParams.append("zeta", parseFloat(zetaValue)); + queryParams.append("fitness_score", parseFloat(zetaValue)); } const url = `${COMPUTE_DECLARATIVE_CONSTRAINTS}?${queryParams.toString()}`; const res = await fetch(url, { method: "GET" }); @@ -1056,7 +1056,7 @@ const ResultsPage = () => { fullWidth /> Date: Sun, 15 Jun 2025 21:46:36 +0200 Subject: [PATCH 16/18] fix --- backend/api/modules/declarative_router.py | 2 +- backend/pql_queries/declarative_queries.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/api/modules/declarative_router.py b/backend/api/modules/declarative_router.py index f11f9d5..7d1167a 100644 --- a/backend/api/modules/declarative_router.py +++ b/backend/api/modules/declarative_router.py @@ -1,7 +1,7 @@ """Contains the routes for handling declarative constraints.""" import uuid -from typing import Dict, List, TypeAlias, Union, Any +from typing import Dict, List, TypeAlias, Union from fastapi import APIRouter, BackgroundTasks, Depends, Request, Query diff --git a/backend/pql_queries/declarative_queries.py b/backend/pql_queries/declarative_queries.py index a69219b..ac8f773 100644 --- a/backend/pql_queries/declarative_queries.py +++ b/backend/pql_queries/declarative_queries.py @@ -29,7 +29,6 @@ def format_graph_and_table(curr_df: pd.DataFrame) -> ReturnGraphType: Returns: ReturnGraphType: A dictionary containing the formatted graph and table. """ - output: ReturnGraphType = {"graphs": [], "tables": []} if not curr_df.empty: From 3a712c9ebfd10d968a5cc2ac0a0963ddbd41563d Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 21:57:48 +0200 Subject: [PATCH 17/18] updated frontend info options --- frontend/src/ResultsPage.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/frontend/src/ResultsPage.js b/frontend/src/ResultsPage.js index caaa27e..e32f2dc 100644 --- a/frontend/src/ResultsPage.js +++ b/frontend/src/ResultsPage.js @@ -149,19 +149,19 @@ const LOG_SKELETON_OPTIONS = [ // -------------------- Log Skeleton Descriptions -------------------- const logSkeletonDescriptions = { - "Get Equivalence": "Checks which activities always occur together in cases.", + "Get Equivalence": "Pairs of activities, where both activities occur equally often in every trace.", "Get Equivalence (PQL)": - "PQL-based variant for identifying equivalent activities.", - "Always Before": "Activity A always occurs before Activity B.", - "Always Before (PQL)": "PQL-based rule for 'always before' relationships.", - "Always After": "Activity A always occurs after Activity B.", - "Always After (PQL)": "PQL-based rule for 'always after' relationships.", - "Never Together": "Detects mutually exclusive activity pairs.", - "Never Together (PQL)": "PQL variant for mutual exclusivity.", - "Directly Follows": "Identifies direct succession between activities.", + "Pairs of activities, where both activities occur equally often in every trace using PQL queries.", + "Always Before": "Pairs of activities, where the first activity always occurs before the second one.", + "Always Before (PQL)": "Pairs of activities, where the first activity always occurs before the second one using PQL queries.", + "Always After": "Pairs of activities, where the second activity always occurs after the first one.", + "Always After (PQL)": "Pairs of activities, where the second activity always occurs after the first one using PQL queries.", + "Never Together": "Pairs of activities that do not occur together in any trace..", + "Never Together (PQL)": "Pairs of activities that do not occur together in any trace using PQL queries.", + "Directly Follows": "Pairs of activities, where the first activity can be followed by the second one.", "Activity Frequencies": "Counts how frequently each activity occurs.", "Directly Follows and Count (PQL)": - "Shows direct follow relationships with frequency (PQL).", + "Pairs of activities, where the first activity can be followed by the second one. The count of occurrences is also provided using PQL queries.", }; // --------------------Declarative Constraints Options -------------------- From afb0bd0633196a272fd8eb507c364e085eed8986 Mon Sep 17 00:00:00 2001 From: Ekansh Agarwal Date: Sun, 15 Jun 2025 22:05:06 +0200 Subject: [PATCH 18/18] fix the type formatting --- backend/pql_queries/declarative_queries.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/pql_queries/declarative_queries.py b/backend/pql_queries/declarative_queries.py index ac8f773..483b2b6 100644 --- a/backend/pql_queries/declarative_queries.py +++ b/backend/pql_queries/declarative_queries.py @@ -1,7 +1,7 @@ """Queries that can be used to get log-skeleton related data from celonis.""" from itertools import combinations -from typing import Dict, List, TypeAlias, Union +from typing import Dict, List, TypeAlias, Union, Any from pandas import DataFrame import pandas as pd @@ -13,8 +13,10 @@ # **************** Type Aliases **************** -TableType: TypeAlias = Dict[str, Union[List[str], List[List[str]]]] -GraphType: TypeAlias = Dict[str, List[Dict[str, str]]] +# TableType: TypeAlias = Dict[str, Union[List[str], List[List[str]]]] +# GraphType: TypeAlias = Dict[str, List[Dict[str, str]]] +TableType: TypeAlias = Dict[str, Any] +GraphType: TypeAlias = Dict[str, Any] ReturnGraphType: TypeAlias = Dict[str, Union[List[TableType], List[GraphType]]] # **************** Formatting Function ****************