initial commit

Browse files

Files changed (9) hide show

.idea/.gitignore +8 -0
.idea/inspectionProfiles/Project_Default.xml +10 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/llm_learn.iml +10 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
demo.ipynb +77 -0
demo.py +4 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,10 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <Languages>
+        <language minSize="533" name="Python" />
+      </Languages>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/llm_learn.iml ADDED Viewed

	@@ -0,0 +1,10 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="pytorch" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="pytorch" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="pytorch" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/llm_learn.iml" filepath="$PROJECT_DIR$/.idea/llm_learn.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/llm_learn" vcs="Git" />
+  </component>
+</project>

demo.ipynb ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "is_executing": true
+    }
+   },
+   "source": [
+    "#第2章/加载编码工具\n",
+    "from transformers import BertTokenizer\n",
+    "tokenizer = BertTokenizer.from_pretrained(\n",
+    "pretrained_model_name_or_path='bert-base-chinese',\n",
+    "cache_dir=None,\n",
+    "force_download=False,\n",
+    ")\n",
+    "\n",
+    "#第2章/准备实验数据\n",
+    "sents = [\n",
+    "'你站在桥上看风景',\n",
+    "'看风景的人在楼上看你',\n",
+    "'明月装饰了你的窗子',\n",
+    "'你装饰了别人的梦',\n",
+    "]\n",
+    "\n",
+    "#第2章/基本的编码函数\n",
+    "out = tokenizer.encode(\n",
+    "text=sents[0],\n",
+    "text_pair=sents[1],\n",
+    "#当句子长度大于max_length时截断\n",
+    "truncation=True,\n",
+    "#一律补PAD，直到max_length长度\n",
+    "padding='max_length',\n",
+    "add_special_tokens=True,\n",
+    "max_length=25,\n",
+    "return_tensors=None,\n",
+    ")\n",
+    "print(out)\n",
+    "print(tokenizer.decode(out))"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "1c21ec1248b8a72a"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

demo.py ADDED Viewed

	@@ -0,0 +1,4 @@

+#第3章/加载数据集
+from datasets import load_dataset
+dataset = load_dataset(path='seamew/ChnSentiCorp')
+dataset