""" VynFi ร— pm4py: Interactive Process Mining Demo Deployed as a HuggingFace Space (Streamlit SDK). Embeddable on vynfi.com via iframe for the /process-mining-data pillar page. Loads the VynFi Supply Chain OCEL dataset from HF, runs pm4py process discovery + variant analysis, and renders interactive visualizations. """ import streamlit as st import pandas as pd import pm4py from pm4py.objects.conversion.log import converter as log_converter from collections import Counter import io st.set_page_config(page_title="VynFi Process Mining", page_icon="๐Ÿ“Š", layout="wide") st.title("๐Ÿ“Š VynFi ร— pm4py: Process Mining Demo") st.caption("Synthetic supply-chain event log from [VynFi](https://vynfi.com) โ€” explore interactively") @st.cache_data def load_data(): """Load the VynFi OCEL dataset from HuggingFace.""" from datasets import load_dataset ds = load_dataset("VynFi/vynfi-supply-chain-ocel", "events", split="train") df = ds.to_pandas() if "timestamp" in df.columns: df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce") # Rename for pm4py rename = {} if "case_id" in df.columns: rename["case_id"] = "case:concept:name" if "activity_name" in df.columns: rename["activity_name"] = "concept:name" elif "activity" in df.columns: rename["activity"] = "concept:name" if "timestamp" in df.columns: rename["timestamp"] = "time:timestamp" df = df.rename(columns=rename) return df df = load_data() st.sidebar.header("Dataset") st.sidebar.metric("Events", f"{len(df):,}") st.sidebar.metric("Activities", df["concept:name"].nunique() if "concept:name" in df.columns else "?") st.sidebar.metric("Cases", df["case:concept:name"].nunique() if "case:concept:name" in df.columns else "?") tab1, tab2, tab3, tab4 = st.tabs(["Process Model", "Variants", "Statistics", "Raw Data"]) with tab1: st.subheader("Directly-Follows Graph") try: event_log = pm4py.convert_to_event_log(df) dfg, start_activities, end_activities = pm4py.discover_dfg(event_log) # Render DFG as dot โ†’ SVG from pm4py.visualization.dfg import visualizer as dfg_visualizer gviz = dfg_visualizer.apply( dfg, log=event_log, variant=dfg_visualizer.Variants.FREQUENCY, parameters={ dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: start_activities, dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: end_activities, dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg", }, ) svg = dfg_visualizer.serialize(gviz).decode("utf-8") st.image(svg, use_container_width=True) except Exception as e: st.warning(f"Could not render DFG: {e}") st.info("Try the Variants or Statistics tabs instead.") with tab2: st.subheader("Process Variants") if "case:concept:name" in df.columns and "concept:name" in df.columns: variants = {} for case_id, group in df.sort_values("time:timestamp").groupby("case:concept:name"): trace = tuple(group["concept:name"].tolist()) variants[case_id] = trace variant_counts = Counter(variants.values()) total = len(variants) st.metric("Unique Variants", len(variant_counts)) rows = [] for trace, count in variant_counts.most_common(20): rows.append({ "Trace": " โ†’ ".join(trace), "Count": count, "Frequency": f"{count / total * 100:.1f}%", }) st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True) # Happy path if variant_counts: happy_path = variant_counts.most_common(1)[0] st.info( f"**Happy path**: {' โ†’ '.join(happy_path[0])} " f"({happy_path[1]} cases, {happy_path[1] / total * 100:.1f}%)" ) with tab3: st.subheader("Activity Statistics") if "concept:name" in df.columns: act_counts = df["concept:name"].value_counts() st.bar_chart(act_counts) col1, col2 = st.columns(2) with col1: st.metric("Most frequent", act_counts.index[0]) st.metric("Count", f"{act_counts.iloc[0]:,}") with col2: st.metric("Least frequent", act_counts.index[-1]) st.metric("Count", f"{act_counts.iloc[-1]:,}") if "time:timestamp" in df.columns: st.subheader("Events Over Time") daily = df.set_index("time:timestamp").resample("W").size() st.line_chart(daily) with tab4: st.subheader("Raw Event Data") st.dataframe(df.head(100), use_container_width=True) st.divider() st.caption( "Data: [VynFi/vynfi-supply-chain-ocel](https://huggingface.co/datasets/VynFi/vynfi-supply-chain-ocel) ยท " "Engine: [pm4py](https://pm4py.fit.fraunhofer.de/) ยท " "Platform: [vynfi.com](https://vynfi.com)" )