Spaces:
Sleeping
Sleeping
| """ | |
| VynFi Γ pm4py: Interactive Process Mining Demo | |
| Deployed as a HuggingFace Space (Streamlit SDK). Embeddable on vynfi.com | |
| via iframe for the /process-mining-data pillar page. | |
| Loads the VynFi Supply Chain OCEL dataset from HF, runs pm4py process | |
| discovery + variant analysis, and renders interactive visualizations. | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import pm4py | |
| from pm4py.objects.conversion.log import converter as log_converter | |
| from collections import Counter | |
| import io | |
| st.set_page_config(page_title="VynFi Process Mining", page_icon="π", layout="wide") | |
| st.title("π VynFi Γ pm4py: Process Mining Demo") | |
| st.caption("Synthetic supply-chain event log from [VynFi](https://vynfi.com) β explore interactively") | |
| def load_data(): | |
| """Load the VynFi OCEL dataset from HuggingFace.""" | |
| from datasets import load_dataset | |
| ds = load_dataset("VynFi/vynfi-supply-chain-ocel", "events", split="train") | |
| df = ds.to_pandas() | |
| if "timestamp" in df.columns: | |
| df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce") | |
| # Rename for pm4py | |
| rename = {} | |
| if "case_id" in df.columns: | |
| rename["case_id"] = "case:concept:name" | |
| if "activity_name" in df.columns: | |
| rename["activity_name"] = "concept:name" | |
| elif "activity" in df.columns: | |
| rename["activity"] = "concept:name" | |
| if "timestamp" in df.columns: | |
| rename["timestamp"] = "time:timestamp" | |
| df = df.rename(columns=rename) | |
| return df | |
| df = load_data() | |
| st.sidebar.header("Dataset") | |
| st.sidebar.metric("Events", f"{len(df):,}") | |
| st.sidebar.metric("Activities", df["concept:name"].nunique() if "concept:name" in df.columns else "?") | |
| st.sidebar.metric("Cases", df["case:concept:name"].nunique() if "case:concept:name" in df.columns else "?") | |
| tab1, tab2, tab3, tab4 = st.tabs(["Process Model", "Variants", "Statistics", "Raw Data"]) | |
| with tab1: | |
| st.subheader("Directly-Follows Graph") | |
| try: | |
| event_log = pm4py.convert_to_event_log(df) | |
| dfg, start_activities, end_activities = pm4py.discover_dfg(event_log) | |
| # Render DFG as dot β SVG | |
| from pm4py.visualization.dfg import visualizer as dfg_visualizer | |
| gviz = dfg_visualizer.apply( | |
| dfg, | |
| log=event_log, | |
| variant=dfg_visualizer.Variants.FREQUENCY, | |
| parameters={ | |
| dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: start_activities, | |
| dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: end_activities, | |
| dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg", | |
| }, | |
| ) | |
| svg = dfg_visualizer.serialize(gviz).decode("utf-8") | |
| st.image(svg, use_container_width=True) | |
| except Exception as e: | |
| st.warning(f"Could not render DFG: {e}") | |
| st.info("Try the Variants or Statistics tabs instead.") | |
| with tab2: | |
| st.subheader("Process Variants") | |
| if "case:concept:name" in df.columns and "concept:name" in df.columns: | |
| variants = {} | |
| for case_id, group in df.sort_values("time:timestamp").groupby("case:concept:name"): | |
| trace = tuple(group["concept:name"].tolist()) | |
| variants[case_id] = trace | |
| variant_counts = Counter(variants.values()) | |
| total = len(variants) | |
| st.metric("Unique Variants", len(variant_counts)) | |
| rows = [] | |
| for trace, count in variant_counts.most_common(20): | |
| rows.append({ | |
| "Trace": " β ".join(trace), | |
| "Count": count, | |
| "Frequency": f"{count / total * 100:.1f}%", | |
| }) | |
| st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True) | |
| # Happy path | |
| if variant_counts: | |
| happy_path = variant_counts.most_common(1)[0] | |
| st.info( | |
| f"**Happy path**: {' β '.join(happy_path[0])} " | |
| f"({happy_path[1]} cases, {happy_path[1] / total * 100:.1f}%)" | |
| ) | |
| with tab3: | |
| st.subheader("Activity Statistics") | |
| if "concept:name" in df.columns: | |
| act_counts = df["concept:name"].value_counts() | |
| st.bar_chart(act_counts) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Most frequent", act_counts.index[0]) | |
| st.metric("Count", f"{act_counts.iloc[0]:,}") | |
| with col2: | |
| st.metric("Least frequent", act_counts.index[-1]) | |
| st.metric("Count", f"{act_counts.iloc[-1]:,}") | |
| if "time:timestamp" in df.columns: | |
| st.subheader("Events Over Time") | |
| daily = df.set_index("time:timestamp").resample("W").size() | |
| st.line_chart(daily) | |
| with tab4: | |
| st.subheader("Raw Event Data") | |
| st.dataframe(df.head(100), use_container_width=True) | |
| st.divider() | |
| st.caption( | |
| "Data: [VynFi/vynfi-supply-chain-ocel](https://huggingface.co/datasets/VynFi/vynfi-supply-chain-ocel) Β· " | |
| "Engine: [pm4py](https://pm4py.fit.fraunhofer.de/) Β· " | |
| "Platform: [vynfi.com](https://vynfi.com)" | |
| ) | |