Spaces:

Haleshot
/

sample1

Sleeping

App Files Files Community

sample1 / app.py

Haleshot

push new demo

b7db177 unverified about 1 year ago

raw

history blame contribute delete

7.14 kB

	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "groq==0.18.0",
	# "pandas==2.2.3",
	# "marimo",
	# ]
	# ///

	import marimo

	__generated_with = "0.11.6"
	app = marimo.App(width="medium")


	@app.cell
	def _():
	import marimo as mo
	return (mo,)


	@app.cell
	def _(mo):
	groq_api_key = mo.ui.text(label='Enter your groq api key here', kind='password')
	groq_api_key
	return (groq_api_key,)


	@app.cell(hide_code=True)
	def _(mo):
	mo.md(
	r"""
	# LLM for NER

	- do a [Few-shot prompting](https://huggingface.co/docs/transformers/main/en/tasks/prompting#few-shot-prompting) and repeat multiple times.

	The default prompt is:
	```python
	Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag.
	The available entities are: "ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength".

	For example:
	Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d .
	Named Entities: <start> Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) <end>

	Here is your task:
	Text: The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours.
	Named Entities:

	Remember to answer in the exact form of the example.
	```
	"""
	)
	return


	@app.cell
	def _(mo, models):
	# define some params
	call_groq_times = mo.ui.slider(10, 100, 10, 30, label='How many times do you want to call groq')
	model_ner = mo.ui.dropdown(options=models, value="llama3-8b-8192", label="Choose a LLM")
	ner_text = mo.ui.text_area(value="", label='Type your text here or leave it to default:')
	ner_tags = mo.ui.text_area(value="", label="Type the ner tags here or leave it to default:", placeholder='e.g. Country, Person')
	checkbox_ner = mo.ui.checkbox(label=' Whether to call groq api')
	mo.vstack([mo.md("# Experiment"), mo.hstack([call_groq_times, model_ner]), mo.hstack([ner_text, ner_tags]), checkbox_ner], align='center')
	return call_groq_times, checkbox_ner, model_ner, ner_tags, ner_text


	@app.cell
	def _(
	call_groq_times,
	chat_completion,
	checkbox_ner,
	client,
	extract_ner_from_assistant,
	mo,
	model_ner,
	prompt_ner,
	):
	# calling groq
	result = []
	if checkbox_ner.value:
	for _ in mo.status.progress_bar(range(call_groq_times.value), title='In Progress …', completion_title='Finished.'):
	try:
	answer = chat_completion(client, prompt_ner, model_ner.value)
	result += extract_ner_from_assistant(answer)
	except Exception:
	pass
	return answer, result


	@app.cell
	def _(mo, pd, result):
	# transform data
	data = pd.DataFrame.from_dict(result)
	# data.to_csv('data.csv')
	# data = pl.from_dicts(result)
	# data.write_csv('data.csv')
	try:
	transformed_df = mo.ui.dataframe(data)
	except Exception:
	df = pd.read_csv('data.csv')
	transformed_df = mo.ui.dataframe(df)
	return data, df, transformed_df


	@app.cell
	def _(mo, transformed_df):
	_md = mo.md(
	r"""
	The results are shown below, use __+ Add__ to apply different transforms and explore more:
	------
	"""
	)
	mo.vstack([_md, transformed_df])
	return


	@app.cell
	async def _():
	import os
	import pandas as pd
	from functools import reduce
	import micropip
	await micropip.install("ssl")
	await micropip.install("groq")
	from groq import Groq
	return Groq, micropip, os, pd, reduce


	@app.cell
	def _():
	# availabel models on groq
	models = [
	"llama3-8b-8192",
	"llama3-70b-8192",
	"llama2-70b-4096",
	"mixtral-8x7b-32768",
	"gemma-7b-it",
	]
	return (models,)


	@app.cell
	def _(Groq, groq_api_key):
	client = Groq(api_key=groq_api_key.value)
	return (client,)


	@app.cell
	def _():
	default_sentence = "The patient then developed oral sores and rash in the chest the night before admission which rapidly spread to the face , trunk , and upper extremities within the last 24 hours."
	default_tags = ["ADE", "Dosage", "Drug", "Duration", "Form", "Frequency", "Reason", "Route" and "Strength"]
	return default_sentence, default_tags


	@app.cell
	def _(default_sentence, default_tags, ner_tags, ner_text):
	prompt_ner = fr"""Return a list of named entities in the text with your confidence score on a scale of 0 to 1 for this tag.
	The available entities are: {ner_tags.value.split(',') if ner_tags.value else default_tags}.

	For example:
	Text: MEDICATIONS : Lipitor , Tylenol with Codeine , Dilantin , previously on Decadron q.i.d .
	Named Entities: <start> Lipitor (Drug: 0.87), Tylenol (Drug: 0.59) <end>

	Here is your task:
	Text: {ner_text.value if ner_text.value else default_sentence}
	Named Entities:

	Remember to answer in the exact form of the example.
	"""
	prompt_ner
	return (prompt_ner,)


	@app.cell
	def _():
	def chat_completion(client, prompt, model):
	completion = client.chat.completions.create(
	messages=[
	{
	"role": "system",
	"content": "you will help me with some NER tasks."
	},
	# set a user message for the assistant to respond to.
	{
	"role": "user",
	"content": prompt,
	}
	],
	# The language model which will generate the completion.
	model=model,
	temperature=0.5,
	max_tokens=100,
	top_p=1,
	stop='<end>',
	# If set, partial message deltas will be sent.
	stream=False,
	)
	answer = completion.choices[0].message.content
	return answer
	return (chat_completion,)


	@app.cell
	def _(reduce):
	def extract_ner_from_assistant(answer: str) -> list[dict]:
	# initialize a generator
	tokens = (token for token in answer.split())
	# iterate through tokens until <start>
	for token in tokens:
	if token == "<start>":
	break
	# e.g. ['oral sores (ADE: 0.98)', 'rash (ADE: 0.98)']
	records = " ".join(list(tokens)).split(",")
	# clean data
	result = map(
	lambda record: reduce(
	lambda acc, elem: {acc, {elem[0]: elem[1]}},
	zip(
	["named entity", "tag", "score"],
	[
	" ".join(record.split()[:-2]),
	str(record.split()[-2])[1:-1],
	float(str(record.split()[-1])[:-1]),
	],
	),
	{}, ## initial value of accumulator
	),
	records,
	)
	return list(result)
	return (extract_ner_from_assistant,)


	if __name__ == "__main__":
	app.run()