ninarg commited on
Commit
3c7d587
Β·
verified Β·
1 Parent(s): d7bc2f6

Add app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VynFi Γ— pm4py: Interactive Process Mining Demo
3
+
4
+ Deployed as a HuggingFace Space (Streamlit SDK). Embeddable on vynfi.com
5
+ via iframe for the /process-mining-data pillar page.
6
+
7
+ Loads the VynFi Supply Chain OCEL dataset from HF, runs pm4py process
8
+ discovery + variant analysis, and renders interactive visualizations.
9
+ """
10
+
11
+ import streamlit as st
12
+ import pandas as pd
13
+ import pm4py
14
+ from pm4py.objects.conversion.log import converter as log_converter
15
+ from collections import Counter
16
+ import io
17
+
18
+ st.set_page_config(page_title="VynFi Process Mining", page_icon="πŸ“Š", layout="wide")
19
+
20
+ st.title("πŸ“Š VynFi Γ— pm4py: Process Mining Demo")
21
+ st.caption("Synthetic supply-chain event log from [VynFi](https://vynfi.com) β€” explore interactively")
22
+
23
+
24
+ @st.cache_data
25
+ def load_data():
26
+ """Load the VynFi OCEL dataset from HuggingFace."""
27
+ from datasets import load_dataset
28
+
29
+ ds = load_dataset("VynFi/vynfi-supply-chain-ocel", "events", split="train")
30
+ df = ds.to_pandas()
31
+ if "timestamp" in df.columns:
32
+ df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
33
+
34
+ # Rename for pm4py
35
+ rename = {}
36
+ if "case_id" in df.columns:
37
+ rename["case_id"] = "case:concept:name"
38
+ if "activity_name" in df.columns:
39
+ rename["activity_name"] = "concept:name"
40
+ elif "activity" in df.columns:
41
+ rename["activity"] = "concept:name"
42
+ if "timestamp" in df.columns:
43
+ rename["timestamp"] = "time:timestamp"
44
+ df = df.rename(columns=rename)
45
+ return df
46
+
47
+
48
+ df = load_data()
49
+
50
+ st.sidebar.header("Dataset")
51
+ st.sidebar.metric("Events", f"{len(df):,}")
52
+ st.sidebar.metric("Activities", df["concept:name"].nunique() if "concept:name" in df.columns else "?")
53
+ st.sidebar.metric("Cases", df["case:concept:name"].nunique() if "case:concept:name" in df.columns else "?")
54
+
55
+ tab1, tab2, tab3, tab4 = st.tabs(["Process Model", "Variants", "Statistics", "Raw Data"])
56
+
57
+ with tab1:
58
+ st.subheader("Directly-Follows Graph")
59
+ try:
60
+ event_log = pm4py.convert_to_event_log(df)
61
+ dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
62
+
63
+ # Render DFG as dot β†’ SVG
64
+ from pm4py.visualization.dfg import visualizer as dfg_visualizer
65
+
66
+ gviz = dfg_visualizer.apply(
67
+ dfg,
68
+ log=event_log,
69
+ variant=dfg_visualizer.Variants.FREQUENCY,
70
+ parameters={
71
+ dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: start_activities,
72
+ dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: end_activities,
73
+ dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg",
74
+ },
75
+ )
76
+ svg = dfg_visualizer.serialize(gviz).decode("utf-8")
77
+ st.image(svg, use_container_width=True)
78
+ except Exception as e:
79
+ st.warning(f"Could not render DFG: {e}")
80
+ st.info("Try the Variants or Statistics tabs instead.")
81
+
82
+ with tab2:
83
+ st.subheader("Process Variants")
84
+ if "case:concept:name" in df.columns and "concept:name" in df.columns:
85
+ variants = {}
86
+ for case_id, group in df.sort_values("time:timestamp").groupby("case:concept:name"):
87
+ trace = tuple(group["concept:name"].tolist())
88
+ variants[case_id] = trace
89
+
90
+ variant_counts = Counter(variants.values())
91
+ total = len(variants)
92
+
93
+ st.metric("Unique Variants", len(variant_counts))
94
+
95
+ rows = []
96
+ for trace, count in variant_counts.most_common(20):
97
+ rows.append({
98
+ "Trace": " β†’ ".join(trace),
99
+ "Count": count,
100
+ "Frequency": f"{count / total * 100:.1f}%",
101
+ })
102
+ st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
103
+
104
+ # Happy path
105
+ if variant_counts:
106
+ happy_path = variant_counts.most_common(1)[0]
107
+ st.info(
108
+ f"**Happy path**: {' β†’ '.join(happy_path[0])} "
109
+ f"({happy_path[1]} cases, {happy_path[1] / total * 100:.1f}%)"
110
+ )
111
+
112
+ with tab3:
113
+ st.subheader("Activity Statistics")
114
+ if "concept:name" in df.columns:
115
+ act_counts = df["concept:name"].value_counts()
116
+ st.bar_chart(act_counts)
117
+
118
+ col1, col2 = st.columns(2)
119
+ with col1:
120
+ st.metric("Most frequent", act_counts.index[0])
121
+ st.metric("Count", f"{act_counts.iloc[0]:,}")
122
+ with col2:
123
+ st.metric("Least frequent", act_counts.index[-1])
124
+ st.metric("Count", f"{act_counts.iloc[-1]:,}")
125
+
126
+ if "time:timestamp" in df.columns:
127
+ st.subheader("Events Over Time")
128
+ daily = df.set_index("time:timestamp").resample("W").size()
129
+ st.line_chart(daily)
130
+
131
+ with tab4:
132
+ st.subheader("Raw Event Data")
133
+ st.dataframe(df.head(100), use_container_width=True)
134
+
135
+ st.divider()
136
+ st.caption(
137
+ "Data: [VynFi/vynfi-supply-chain-ocel](https://huggingface.co/datasets/VynFi/vynfi-supply-chain-ocel) Β· "
138
+ "Engine: [pm4py](https://pm4py.fit.fraunhofer.de/) Β· "
139
+ "Platform: [vynfi.com](https://vynfi.com)"
140
+ )