Spaces:
Sleeping
Sleeping
File size: 5,828 Bytes
2d6c179 dcedb7e 2d6c179 dcedb7e 2d6c179 dcedb7e 2d6c179 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | """
VynFi Streamlit Template β Generate, Explore, Visualize
"""
import os
import streamlit as st
import pandas as pd
import vynfi
st.set_page_config(page_title="VynFi Explorer", page_icon="π", layout="wide")
st.title("π VynFi Data Explorer")
api_key = os.environ.get("VYNFI_API_KEY", "")
if not api_key:
api_key = st.sidebar.text_input("VynFi API Key", type="password", placeholder="vf_live_...")
if not api_key:
st.info("Enter your VynFi API key in the sidebar to get started. [Get a free key β](https://vynfi.com/signup)")
st.stop()
client = vynfi.VynFi(api_key=api_key)
st.sidebar.header("Generate")
sector = st.sidebar.selectbox(
"Sector",
["retail", "manufacturing", "financial_services", "banking_aml", "healthcare", "technology", "energy"],
index=1,
)
rows = st.sidebar.slider("Rows", min_value=100, max_value=100_000, value=1000, step=100)
companies = st.sidebar.slider("Companies", min_value=1, max_value=20, value=3)
fraud_rate = st.sidebar.slider("Fraud rate", min_value=0.0, max_value=0.20, value=0.03, step=0.01)
st.sidebar.divider()
nl_description = st.sidebar.text_area(
"Or describe what you want (Scale+)",
placeholder="e.g. 6 months of P2P for a German manufacturer with IFRS",
height=80,
)
generate = st.sidebar.button("Generate", type="primary", use_container_width=True)
if generate:
with st.spinner("Generating..."):
try:
if nl_description.strip():
resp = client._request("POST", "/v1/configs/from-description", json={
"description": nl_description.strip()
})
config = resp.get("config", {})
st.sidebar.success(f"AI config: {config.get('sector')} / {config.get('rows')} rows")
else:
config = {
"sector": sector,
"rows": rows,
"companies": companies,
"fraudRate": fraud_rate,
"complexity": "medium",
"exportFormat": "json",
"output": {"numericMode": "native"},
}
job = client.jobs.generate_config(config=config)
completed = client.jobs.wait(job.id, poll_interval=3.0, timeout=300.0)
if completed.status != "completed":
st.error(f"Job failed: {completed.error_detail}")
st.stop()
st.session_state["job_id"] = completed.id
st.session_state["archive"] = client.jobs.download_archive(completed.id)
st.success(f"Job {completed.id} completed")
except Exception as e:
st.error(f"Error: {e}")
if "archive" in st.session_state:
archive = st.session_state["archive"]
tab1, tab2, tab3, tab4 = st.tabs(["Journal Entries", "Documents", "Quality", "Files"])
with tab1:
st.subheader("Journal Entries")
try:
entries = archive.json("journal_entries.json")
rows_flat = []
for entry in entries[:500]:
header = entry.get("header", entry)
lines = entry.get("lines", [entry])
for line in lines:
rows_flat.append({
"document_id": header.get("document_id", ""),
"company_code": header.get("company_code", ""),
"posting_date": header.get("posting_date", ""),
"document_type": header.get("document_type", ""),
"is_fraud": header.get("is_fraud", False),
"gl_account": line.get("gl_account", ""),
"debit_amount": line.get("debit_amount", 0),
"credit_amount": line.get("credit_amount", 0),
})
df = pd.DataFrame(rows_flat)
# Convert amounts to numeric (handles both string and native)
for col in ["debit_amount", "credit_amount"]:
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
st.metric("Line items", f"{len(df):,}")
col1, col2 = st.columns(2)
with col1:
st.metric("Total debits", f"${df['debit_amount'].sum():,.2f}")
with col2:
fraud_count = int(df["is_fraud"].sum())
pct = fraud_count / len(df) * 100 if len(df) > 0 else 0
st.metric("Fraud entries", f"{fraud_count} ({pct:.1f}%)")
st.dataframe(df, use_container_width=True, hide_index=True)
except Exception as e:
st.warning(f"Could not load journal entries: {e}")
with tab2:
st.subheader("Document Flows")
for doc_type in ["purchase_orders", "goods_receipts", "vendor_invoices", "payments"]:
try:
docs = archive.json(f"document_flows/{doc_type}.json")
st.write(f"**{doc_type.replace('_', ' ').title()}**: {len(docs)} records")
except Exception:
pass
with tab3:
st.subheader("Quality Metrics")
try:
analytics = client.jobs.analytics(st.session_state["job_id"])
if hasattr(analytics, "benford_analysis") and analytics.benford_analysis:
b = analytics.benford_analysis
col1, col2, col3 = st.columns(3)
col1.metric("Benford MAD", f"{b.mad:.4f}")
col2.metric("Chi-squared", f"{b.chi_squared:.2f}")
col3.metric("Conforms", "β
" if b.passes else "β")
except Exception:
st.info("Quality analytics not available for this job.")
with tab4:
st.subheader("Archive Files")
for f in archive.files():
st.text(f)
else:
st.info("Click **Generate** in the sidebar to create a dataset.")
|