File size: 6,305 Bytes
96f0aad
52ac874
65ce454
 
48ac593
c7cc5ed
65ce454
 
 
 
 
c7cc5ed
50ba7ba
48ac593
50ba7ba
48ac593
c7cc5ed
 
 
4e778f2
 
c7cc5ed
 
50ba7ba
 
c7cc5ed
50ba7ba
c7cc5ed
50ba7ba
 
0fa3514
4e778f2
0fa3514
 
50ba7ba
 
 
 
0fa3514
c7cc5ed
 
 
 
 
48ac593
 
50ba7ba
c7cc5ed
 
 
 
 
4e778f2
50ba7ba
 
 
4e778f2
50ba7ba
c7cc5ed
 
 
 
 
 
 
48ac593
 
65ce454
 
 
 
48ac593
50ba7ba
48ac593
 
 
50ba7ba
48ac593
50ba7ba
4e778f2
 
48ac593
50ba7ba
 
 
48ac593
50ba7ba
48ac593
 
 
50ba7ba
48ac593
 
 
4e778f2
48ac593
65ce454
0fa3514
 
 
 
 
 
 
 
 
50ba7ba
4e778f2
0fa3514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e778f2
48ac593
e1dabbf
5aa8f9e
48ac593
 
e1dabbf
b22ffb2
48ac593
 
 
 
 
02564e8
ed83876
e32a04d
 
4e778f2
50ba7ba
e32a04d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
import json
import difflib
import re

def normalize(s):
    return ' '.join(str(s).lower().replace("_", " ").replace("-", " ").replace(".", " ").split())

def is_fuzzy_match(a, b, threshold=0.7):
    ratio = difflib.SequenceMatcher(None, a, b).ratio()
    return ratio >= threshold or a in b or b in a

def recursive_fuzzy_value_search(target_value):
    matches = []
    norm_target = normalize(target_value)
    for file_name, data in st.session_state.json_data.items():
        def _search(obj, path):
            if isinstance(obj, dict):
                for k, v in obj.items():
                    # Match ANY primitive value (str, int, float, bool)
                    if isinstance(v, (str, int, float, bool)) and is_fuzzy_match(norm_target, normalize(v)):
                        matches.append({
                            "match_path": path + [k],
                            "matched_value": v,
                            "key": k,
                            "record": obj,
                            "file": file_name
                        })
                    # Check inside nested dicts
                    if isinstance(v, dict):
                        for nk, nv in v.items():
                            if isinstance(nv, (str, int, float, bool)) and is_fuzzy_match(norm_target, normalize(nv)):
                                matches.append({
                                    "match_path": path + [k, nk],
                                    "matched_value": nv,
                                    "key": nk,
                                    "record": v,
                                    "file": file_name
                                })
                    _search(v, path + [k])
            elif isinstance(obj, list):
                for idx, item in enumerate(obj):
                    _search(item, path + [f"[{idx}]"])
        _search(data, [])
    return matches

def show_all_strings():
    found = []
    for file_name, data in st.session_state.json_data.items():
        def recursive(obj, path):
            if isinstance(obj, dict):
                for k, v in obj.items():
                    if isinstance(v, (str, int, float, bool)):
                        found.append(f"{file_name} | {'.'.join(path + [k])} = {v}")
                    elif isinstance(v, dict):
                        for nk, nv in v.items():
                            if isinstance(nv, (str, int, float, bool)):
                                found.append(f"{file_name} | {'.'.join(path + [k, nk])} = {nv}")
                    recursive(v, path + [k])
            elif isinstance(obj, list):
                for idx, item in enumerate(obj):
                    recursive(item, path + [f"[{idx}]"])
        recursive(data, [])
    return found

def handle_user_query(query):
    patterns = [
        r"(?:last\s*login.*?for|when\s+did)\s+([a-zA-Z0-9 _\-\.@]+)",
        r"when\s+was\s+([a-zA-Z0-9 _\-\.@]+)\s+last\s+(?:login|logged\s*in)",
        r"last\s*login\s*of\s+([a-zA-Z0-9 _\-\.@]+)",
        r"(?:info|details|record) for\s+([a-zA-Z0-9 _\-\.@]+)"
    ]
    found_value = None
    for pat in patterns:
        m = re.search(pat, query, re.IGNORECASE)
        if m:
            found_value = m.group(1).strip()
            break
    if not found_value:
        # Fallback: any word/phrase of 1+ char (letters, digits, spaces, dashes, underscores, dots)
        m = re.search(r"([A-Za-z0-9][A-Za-z0-9 _\-\.@]*)", query)
        if m:
            found_value = m.group(1).strip()
    if found_value:
        results = recursive_fuzzy_value_search(found_value)
        if not results:
            return f"No records found for '{found_value}' in any file."
        answers = []
        for res in results:
            answers.append(
                f"**{res['matched_value']}** (in file `{res['file']}` | key: `{res['key']}` | path: `{'.'.join(res['match_path'])}`)"
            )
        return "\n\n".join(answers)
    else:
        return "No valid search value detected. Try a person's name, number, product, device, etc."

# --- Streamlit UI setup ---
if "json_data" not in st.session_state:
    st.session_state.json_data = {}
if "messages" not in st.session_state:
    st.session_state.messages = []
if "temp_input" not in st.session_state:
    st.session_state.temp_input = ""
if "files_loaded" not in st.session_state:
    st.session_state.files_loaded = False

st.set_page_config(page_title="Flexible JSON Fuzzy Search", layout="wide")
st.title("Instant JSON-Backed Q&A (Flexible Fuzzy Search — All Keys & Types!)")

uploaded_files = st.sidebar.file_uploader(
    "Choose one or more JSON files", type="json", accept_multiple_files=True
)
if uploaded_files and not st.session_state.files_loaded:
    st.session_state.json_data.clear()
    for f in uploaded_files:
        try:
            content = json.load(f)
            st.session_state.json_data[f.name] = content
            st.sidebar.success(f"Loaded: {f.name}")
        except Exception as e:
            st.sidebar.error(f"Error reading {f.name}: {e}")
    st.session_state.messages = []
    st.session_state.files_loaded = True
elif not uploaded_files:
    st.session_state.json_data.clear()
    st.session_state.files_loaded = False

st.markdown("### Ask about ANY value (name, product, number, device, etc) — partials/typos/substring OK!")
for msg in st.session_state.messages:
    if msg["role"] == "user":
        st.markdown(f"<div style='color: #4F8BF9;'><b>User:</b> {msg['content']}</div>", unsafe_allow_html=True)
    else:
        st.markdown(f"<div style='color: #1C6E4C;'><b>Agent:</b> {msg['content']}</div>", unsafe_allow_html=True)

def send_message():
    user_input = st.session_state.temp_input
    if user_input.strip():
        st.session_state.messages.append({"role": "user", "content": user_input})
        answer = handle_user_query(user_input)
        st.session_state.messages.append({"role": "assistant", "content": answer})
        st.session_state.temp_input = ""

if st.session_state.json_data:
    st.text_input("Your message:", key="temp_input", on_change=send_message)
    if st.button("Show all values in uploaded JSONs"):
        st.write(show_all_strings())
else:
    st.info("Please upload at least one JSON file to start chatting.")