ninarg's picture
Add app.py
3c7d587 verified
raw
history blame
5.04 kB
"""
VynFi Γ— pm4py: Interactive Process Mining Demo
Deployed as a HuggingFace Space (Streamlit SDK). Embeddable on vynfi.com
via iframe for the /process-mining-data pillar page.
Loads the VynFi Supply Chain OCEL dataset from HF, runs pm4py process
discovery + variant analysis, and renders interactive visualizations.
"""
import streamlit as st
import pandas as pd
import pm4py
from pm4py.objects.conversion.log import converter as log_converter
from collections import Counter
import io
st.set_page_config(page_title="VynFi Process Mining", page_icon="πŸ“Š", layout="wide")
st.title("πŸ“Š VynFi Γ— pm4py: Process Mining Demo")
st.caption("Synthetic supply-chain event log from [VynFi](https://vynfi.com) β€” explore interactively")
@st.cache_data
def load_data():
"""Load the VynFi OCEL dataset from HuggingFace."""
from datasets import load_dataset
ds = load_dataset("VynFi/vynfi-supply-chain-ocel", "events", split="train")
df = ds.to_pandas()
if "timestamp" in df.columns:
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
# Rename for pm4py
rename = {}
if "case_id" in df.columns:
rename["case_id"] = "case:concept:name"
if "activity_name" in df.columns:
rename["activity_name"] = "concept:name"
elif "activity" in df.columns:
rename["activity"] = "concept:name"
if "timestamp" in df.columns:
rename["timestamp"] = "time:timestamp"
df = df.rename(columns=rename)
return df
df = load_data()
st.sidebar.header("Dataset")
st.sidebar.metric("Events", f"{len(df):,}")
st.sidebar.metric("Activities", df["concept:name"].nunique() if "concept:name" in df.columns else "?")
st.sidebar.metric("Cases", df["case:concept:name"].nunique() if "case:concept:name" in df.columns else "?")
tab1, tab2, tab3, tab4 = st.tabs(["Process Model", "Variants", "Statistics", "Raw Data"])
with tab1:
st.subheader("Directly-Follows Graph")
try:
event_log = pm4py.convert_to_event_log(df)
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
# Render DFG as dot β†’ SVG
from pm4py.visualization.dfg import visualizer as dfg_visualizer
gviz = dfg_visualizer.apply(
dfg,
log=event_log,
variant=dfg_visualizer.Variants.FREQUENCY,
parameters={
dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: start_activities,
dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: end_activities,
dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg",
},
)
svg = dfg_visualizer.serialize(gviz).decode("utf-8")
st.image(svg, use_container_width=True)
except Exception as e:
st.warning(f"Could not render DFG: {e}")
st.info("Try the Variants or Statistics tabs instead.")
with tab2:
st.subheader("Process Variants")
if "case:concept:name" in df.columns and "concept:name" in df.columns:
variants = {}
for case_id, group in df.sort_values("time:timestamp").groupby("case:concept:name"):
trace = tuple(group["concept:name"].tolist())
variants[case_id] = trace
variant_counts = Counter(variants.values())
total = len(variants)
st.metric("Unique Variants", len(variant_counts))
rows = []
for trace, count in variant_counts.most_common(20):
rows.append({
"Trace": " β†’ ".join(trace),
"Count": count,
"Frequency": f"{count / total * 100:.1f}%",
})
st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
# Happy path
if variant_counts:
happy_path = variant_counts.most_common(1)[0]
st.info(
f"**Happy path**: {' β†’ '.join(happy_path[0])} "
f"({happy_path[1]} cases, {happy_path[1] / total * 100:.1f}%)"
)
with tab3:
st.subheader("Activity Statistics")
if "concept:name" in df.columns:
act_counts = df["concept:name"].value_counts()
st.bar_chart(act_counts)
col1, col2 = st.columns(2)
with col1:
st.metric("Most frequent", act_counts.index[0])
st.metric("Count", f"{act_counts.iloc[0]:,}")
with col2:
st.metric("Least frequent", act_counts.index[-1])
st.metric("Count", f"{act_counts.iloc[-1]:,}")
if "time:timestamp" in df.columns:
st.subheader("Events Over Time")
daily = df.set_index("time:timestamp").resample("W").size()
st.line_chart(daily)
with tab4:
st.subheader("Raw Event Data")
st.dataframe(df.head(100), use_container_width=True)
st.divider()
st.caption(
"Data: [VynFi/vynfi-supply-chain-ocel](https://huggingface.co/datasets/VynFi/vynfi-supply-chain-ocel) Β· "
"Engine: [pm4py](https://pm4py.fit.fraunhofer.de/) Β· "
"Platform: [vynfi.com](https://vynfi.com)"
)