root1091
/

PaliGemma-testing

Model card Files Files and versions

xet

Community

root1091 commited on May 21, 2024

Commit

6a6df5e

1 Parent(s): 470cacb

testing Pali Gemma for the first time

Browse files

Files changed (1) hide show

paligemma_testing.ipynb +102 -0

paligemma_testing.ipynb ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "77414e9d91534e578d51cced47102e57",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a GemmaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "TAX INVOICE\n",
+      "Bill No. 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoProcessor, PaliGemmaForConditionalGeneration\n",
+    "import requests\n",
+    "from PIL import Image\n",
+    "\n",
+    "model_id = \"google/paligemma-3b-mix-224\"\n",
+    "model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)\n",
+    "processor = AutoProcessor.from_pretrained(model_id)\n",
+    "\n",
+    "prompt = \"ocr\"\n",
+    "image_file = \"sample_invoice.png\"\n",
+    "raw_image = Image.open(image_file)\n",
+    "inputs = processor(prompt, raw_image, return_tensors=\"pt\")\n",
+    "output = model.generate(**inputs, max_new_tokens=100)\n",
+    "\n",
+    "print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])\n",
+    "# bee\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GPU is not available\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "if torch.cuda.is_available():\n",
+    "    print(\"GPU is available\")\n",
+    "else:\n",
+    "    print(\"GPU is not available\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "gemini_gemma",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}