RegBot4.1

Sleeping

Zwea Htet

integrated open source llms

f5254ad almost 2 years ago

2 kB

	from llama_index.llms.huggingface import HuggingFaceLLM, HuggingFaceInferenceAPI
	from llama_index.llms.openai import OpenAI
	from llama_index.llms.replicate import Replicate

	from dotenv import load_dotenv
	import os
	import streamlit as st

	load_dotenv()

	# download the model from the Hugging Face Hub and run it locally
	# llm_mixtral_8x7b = HuggingFaceLLM(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1")

	# llm_llama_2_7b_chat = HuggingFaceInferenceAPI(
	# model_name="meta-llama/Llama-2-7b-chat-hf",
	# token=os.getenv("HUGGINGFACE_API_TOKEN"),
	# )

	# dict = {"source": "model_name"}
	integrated_llms = {
	"gpt-3.5-turbo-0125": "openai",
	"meta/llama-2-13b-chat": "replicate",
	"mistralai/Mistral-7B-Instruct-v0.2": "huggingface",
	# "mistralai/Mixtral-8x7B-v0.1": "huggingface", # 93 GB model
	# "meta-llama/Meta-Llama-3-8B": "huggingface", # too large >10G for llama index hf interference to load
	}


	def load_llm(model_name: str, source: str = "huggingface"):
	print("model_name: ", model_name, "source: ", source)
	if integrated_llms.get(model_name) is None:
	return None
	try:
	if source.startswith("openai"):
	llm_gpt_3_5_turbo_0125 = OpenAI(
	model=model_name,
	api_key=st.session_state.openai_api_key,
	)

	return llm_gpt_3_5_turbo_0125

	elif source.startswith("replicate"):
	llm_llama_13b_v2_replicate = Replicate(
	model=model_name,
	is_chat_model=True,
	additional_kwargs={"max_new_tokens": 250},
	prompt_key=st.session_state.replicate_api_token,
	)

	return llm_llama_13b_v2_replicate

	elif source.startswith("huggingface"):
	llm_mixtral_8x7b = HuggingFaceInferenceAPI(
	model_name=model_name,
	token=st.session_state.hf_token,
	)

	return llm_mixtral_8x7b

	except Exception as e:
	print(e)