root1091 commited on
Commit
6a6df5e
·
1 Parent(s): 470cacb

testing Pali Gemma for the first time

Browse files
Files changed (1) hide show
  1. paligemma_testing.ipynb +102 -0
paligemma_testing.ipynb ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "application/vnd.jupyter.widget-view+json": {
11
+ "model_id": "77414e9d91534e578d51cced47102e57",
12
+ "version_major": 2,
13
+ "version_minor": 0
14
+ },
15
+ "text/plain": [
16
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
17
+ ]
18
+ },
19
+ "metadata": {},
20
+ "output_type": "display_data"
21
+ },
22
+ {
23
+ "name": "stderr",
24
+ "output_type": "stream",
25
+ "text": [
26
+ "You're using a GemmaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
27
+ ]
28
+ },
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "\n",
34
+ "TAX INVOICE\n",
35
+ "Bill No. 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\n"
36
+ ]
37
+ }
38
+ ],
39
+ "source": [
40
+ "from transformers import AutoProcessor, PaliGemmaForConditionalGeneration\n",
41
+ "import requests\n",
42
+ "from PIL import Image\n",
43
+ "\n",
44
+ "model_id = \"google/paligemma-3b-mix-224\"\n",
45
+ "model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)\n",
46
+ "processor = AutoProcessor.from_pretrained(model_id)\n",
47
+ "\n",
48
+ "prompt = \"ocr\"\n",
49
+ "image_file = \"sample_invoice.png\"\n",
50
+ "raw_image = Image.open(image_file)\n",
51
+ "inputs = processor(prompt, raw_image, return_tensors=\"pt\")\n",
52
+ "output = model.generate(**inputs, max_new_tokens=100)\n",
53
+ "\n",
54
+ "print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])\n",
55
+ "# bee\n"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 4,
61
+ "metadata": {},
62
+ "outputs": [
63
+ {
64
+ "name": "stdout",
65
+ "output_type": "stream",
66
+ "text": [
67
+ "GPU is not available\n"
68
+ ]
69
+ }
70
+ ],
71
+ "source": [
72
+ "import torch\n",
73
+ "\n",
74
+ "if torch.cuda.is_available():\n",
75
+ " print(\"GPU is available\")\n",
76
+ "else:\n",
77
+ " print(\"GPU is not available\")"
78
+ ]
79
+ }
80
+ ],
81
+ "metadata": {
82
+ "kernelspec": {
83
+ "display_name": "gemini_gemma",
84
+ "language": "python",
85
+ "name": "python3"
86
+ },
87
+ "language_info": {
88
+ "codemirror_mode": {
89
+ "name": "ipython",
90
+ "version": 3
91
+ },
92
+ "file_extension": ".py",
93
+ "mimetype": "text/x-python",
94
+ "name": "python",
95
+ "nbconvert_exporter": "python",
96
+ "pygments_lexer": "ipython3",
97
+ "version": "3.10.14"
98
+ }
99
+ },
100
+ "nbformat": 4,
101
+ "nbformat_minor": 2
102
+ }