Spaces:

vortexa64
/

chatbot

Paused

App Files Files Community

chatbot / app.py

vortexa64

Create app.py

800491f verified 7 months ago

raw

history blame contribute delete

2.83 kB

	import numpy as np
	import json
	import gradio as gr

	# === LOAD CORPUS & DATASET ===
	with open("corpus.txt", "r", encoding="utf-8") as f:
	corpus = f.read().splitlines()

	with open("dataset.json", "r", encoding="utf-8") as f:
	qa_data = json.load(f)

	# === BUILD VOCAB ===
	all_texts = corpus + list(qa_data.keys()) + list(qa_data.values())
	vocab = list(set(" ".join(all_texts).split()))
	word2id = {w: i for i, w in enumerate(vocab)}
	id2word = {i: w for w, i in word2id.items()}
	vocab_size = len(vocab)

	def one_hot(word):
	vec = np.zeros(vocab_size)
	if word in word2id:
	vec[word2id[word]] = 1
	return vec

	# === PARAMETER MODEL ===
	np.random.seed(42)
	hidden_size = 512 # bisa diubah
	W1 = np.random.randn(vocab_size, hidden_size) * 0.01
	W2 = np.random.randn(hidden_size, vocab_size) * 0.01
	lr = 0.05

	def softmax(x):
	e = np.exp(x - np.max(x))
	return e / e.sum()

	def train_step(sentence):
	global W1, W2
	words = sentence.split()
	loss = 0
	for i in range(len(words)-1):
	x = one_hot(words[i])
	y_true = one_hot(words[i+1])
	h = np.dot(x, W1)
	o = np.dot(h, W2)
	y_pred = softmax(o)
	loss += np.mean((y_true - y_pred)**2)
	grad_o = y_pred - y_true
	dW2 = np.outer(h, grad_o)
	dW1 = np.outer(x, np.dot(W2, grad_o))
	W1 -= lr * dW1
	W2 -= lr * dW2
	return loss

	# === PRETRAIN DENGAN CORPUS ===
	for epoch in range(200):
	total_loss = 0
	for line in corpus:
	total_loss += train_step(line + " <END>")
	if epoch % 50 == 0:
	print(f"Pretrain Epoch {epoch}, Loss: {total_loss:.4f}")

	# === FINE-TUNE DENGAN Q&A ===
	for epoch in range(200):
	total_loss = 0
	for q, a in qa_data.items():
	total_loss += train_step(q + " " + a)
	if epoch % 50 == 0:
	print(f"Finetune Epoch {epoch}, Loss: {total_loss:.4f}")

	# === GENERATE JAWABAN ===
	def generate_reply(question, max_len=30):
	words = question.split()
	if words[0] not in word2id:
	return "Maaf, aku belum ngerti kata itu 🥺"
	x = one_hot(words[0])
	reply = []
	for _ in range(max_len):
	h = np.dot(x, W1)
	o = np.dot(h, W2)
	y_pred = softmax(o)
	pred_id = np.argmax(y_pred) # argmax biar konsisten
	pred_word = id2word[pred_id]
	if pred_word == "<END>":
	break
	reply.append(pred_word)
	x = one_hot(pred_word)
	return " ".join(reply)

	# === GRADIO INTERFACE ===
	def chatbot(input_text):
	return generate_reply(input_text)

	demo = gr.Interface(fn=chatbot,
	inputs="text",
	outputs="text",
	title="Chatbot Numpy ala Cici 🤭",
	description="Mini chatbot dengan training 2 tahap: corpus + Q&A")

	if __name__ == "__main__":
	demo.launch()