welyjesch commited on
Commit
f71c5f7
·
verified ·
1 Parent(s): 5c19909

Upload Filipino_Llama3_1_Inference_Only.ipynb

Browse files
Filipino_Llama3_1_Inference_Only.ipynb ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "source": [
6
+ "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
7
+ "!pip install peft transformers accelerate bitsandbytes\n"
8
+ ],
9
+ "metadata": {
10
+ "id": "RTea66-cz-9p"
11
+ },
12
+ "execution_count": null,
13
+ "outputs": []
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "source": [
18
+ "#@title This uses Unsloth and the fine-tuned lora for faster inference\n",
19
+ "from unsloth import FastLanguageModel\n",
20
+ "from peft import PeftModel\n",
21
+ "import torch\n",
22
+ "\n",
23
+ "max_seq_length = 2048\n",
24
+ "\n",
25
+ "# Load the BASE model (must match the base used to train the LoRA)\n",
26
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
27
+ " model_name = \"unsloth/Llama-3.1-8B\",\n",
28
+ " max_seq_length = max_seq_length,\n",
29
+ " dtype = None,\n",
30
+ " load_in_4bit = True,\n",
31
+ ")\n",
32
+ "\n",
33
+ "# Load the LoRA adapter from HuggingFace\n",
34
+ "model = PeftModel.from_pretrained(\n",
35
+ " model,\n",
36
+ " \"welyjesch/filipino_llama_3.1_FT_lora\"\n",
37
+ ")\n",
38
+ "\n",
39
+ "# Enable Unsloth optimized inference\n",
40
+ "FastLanguageModel.for_inference(model)\n",
41
+ "\n"
42
+ ],
43
+ "metadata": {
44
+ "id": "xXc4bcMG4_0e"
45
+ },
46
+ "execution_count": null,
47
+ "outputs": []
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "source": [
52
+ "#@title Run this after entering your instructions\n",
53
+ "\n",
54
+ "alpaca_prompt = \"\"\"Ang nasa ibaba ay isang instruksyon na naglalarawan ng isang gawain. Sumulat ng isang tugon na angkop na kumukumpleto sa kahilingan.\n",
55
+ "\n",
56
+ "### Instruction:\n",
57
+ "{}\n",
58
+ "\n",
59
+ "### Response:\n",
60
+ "{}\n",
61
+ "\"\"\"\n",
62
+ "\n",
63
+ "instructions = \"Gumawa ng isang kantang Tagalog\" # @param {type:\"string\"}\n",
64
+ "inputs = tokenizer(\n",
65
+ "[\n",
66
+ " alpaca_prompt.format(\n",
67
+ " instructions,\n",
68
+ " \"\",\n",
69
+ " )\n",
70
+ "], return_tensors=\"pt\").to(\"cuda\")\n",
71
+ "\n",
72
+ "outputs = model.generate(\n",
73
+ " **inputs,\n",
74
+ " max_new_tokens=256,\n",
75
+ " use_cache=True\n",
76
+ ")\n",
77
+ "\n",
78
+ "import textwrap\n",
79
+ "raw_output = tokenizer.batch_decode(outputs)[0]\n",
80
+ "\n",
81
+ "# Split the output into lines, wrap each line, and then join them back\n",
82
+ "wrapped_lines = [textwrap.fill(line, width=80) for line in raw_output.splitlines()]\n",
83
+ "wrapped_output = '\\n'.join(wrapped_lines)\n",
84
+ "\n",
85
+ "print(wrapped_output)"
86
+ ],
87
+ "metadata": {
88
+ "id": "PO0DWOjw568p"
89
+ },
90
+ "execution_count": null,
91
+ "outputs": []
92
+ }
93
+ ],
94
+ "metadata": {
95
+ "colab": {
96
+ "provenance": [],
97
+ "gpuType": "T4"
98
+ },
99
+ "kernelspec": {
100
+ "display_name": "Python 3",
101
+ "name": "python3"
102
+ },
103
+ "language_info": {
104
+ "name": "python"
105
+ },
106
+ "accelerator": "GPU"
107
+ },
108
+ "nbformat": 4,
109
+ "nbformat_minor": 0
110
+ }