| from flask import Flask, request, jsonify |
| from langchain_community.llms import LlamaCpp |
| import os |
| app = Flask(__name__) |
|
|
| n_gpu_layers = 0 |
| n_batch = 1024 |
|
|
|
|
| llm = LlamaCpp( |
| model_path="Phi-3-mini-4k-instruct-q4.gguf", |
| temperature=0.1, |
| n_gpu_layers=n_gpu_layers, |
| n_batch=n_batch, |
| verbose=True, |
| n_ctx=4096 |
| ) |
| file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf') |
| print("model size ====> :", file_size.st_size, "bytes") |
|
|
|
|
| @app.route('/', methods=['POST']) |
| def get_skills(): |
| cv_body = request.json.get('cv_body') |
|
|
| |
| output = llm( |
| f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>", |
| max_tokens=256, |
| stop=["<|end|>"], |
| echo=True, |
| ) |
|
|
| return jsonify({'skills': output}) |
|
|
| if __name__ == '__main__': |
| app.run() |
| from flask import Flask, request, jsonify |
| import nltk |
| from gensim.models import Word2Vec |
| import numpy as np |
| from sklearn.metrics.pairwise import cosine_similarity |
| import matplotlib.pyplot as plt |
| import io |
| import base64 |
|
|
| nltk.download('punkt') |
|
|
| app = Flask(__name__) |
|
|
| texts = [ |
| "This is a sample text.", |
| "Another example of text.", |
| "More texts to compare." |
| ] |
|
|
| tokenized_texts = [nltk.word_tokenize(text.lower()) for text in texts] |
|
|
| word_embeddings_model = Word2Vec(sentences=tokenized_texts, vector_size=100, window=5, min_count=1, workers=4) |
|
|
| def text_embedding(text): |
| words = nltk.word_tokenize(text.lower()) |
| embeddings = [word_embeddings_model.wv[word] for word in words if word in word_embeddings_model.wv] |
| if embeddings: |
| return np.mean(embeddings, axis=0) |
| else: |
| return np.zeros(word_embeddings_model.vector_size) |
|
|
| @app.route('/process', methods=['POST']) |
| def process(): |
| data = request.get_json() |
| input_text = data.get('input_text', '') |
|
|
| if not input_text: |
| return jsonify({'error': 'No input text provided'}), 400 |
|
|
| input_embedding = text_embedding(input_text) |
| text_embeddings = [text_embedding(text) for text in texts] |
|
|
| similarities = cosine_similarity([input_embedding], text_embeddings).flatten() |
| similarities_percentages = [similarity * 100 for similarity in similarities] |
|
|
| fig, ax = plt.subplots(figsize=(10, 6)) |
| texts_for_plotting = [f"Text {i+1}" for i in range(len(texts))] |
| ax.bar(texts_for_plotting, similarities_percentages) |
| ax.set_ylabel('Similarity (%)') |
| ax.set_xlabel('Texts') |
| ax.set_title('Similarity of Input Text with other texts') |
| plt.xticks(rotation=45, ha='right') |
| plt.tight_layout() |
|
|
| buf = io.BytesIO() |
| plt.savefig(buf, format='png') |
| buf.seek(0) |
| img_base64 = base64.b64encode(buf.read()).decode('utf-8') |
| plt.close() |
|
|
| sorted_indices = np.argsort(similarities)[::-1] |
| similar_texts = [(similarities[idx] * 100, texts[idx]) for idx in sorted_indices[:3]] |
|
|
| response = { |
| 'similarities': similarities_percentages, |
| 'plot': img_base64, |
| 'most_similar_texts': similar_texts |
| } |
|
|
| return jsonify(response) |
|
|
| if __name__ == '__main__': |
| app.run(host='0.0.0.0', port=8080, debug=True) |
|
|