Felix1314 commited on
Commit
c122b61
·
1 Parent(s): 3fad3b4

initial commit

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
5
+ <Languages>
6
+ <language minSize="533" name="Python" />
7
+ </Languages>
8
+ </inspection_tool>
9
+ </profile>
10
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/llm_learn.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/.venv" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="pytorch" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="pytorch" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="pytorch" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/llm_learn.iml" filepath="$PROJECT_DIR$/.idea/llm_learn.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$/llm_learn" vcs="Git" />
5
+ </component>
6
+ </project>
demo.ipynb ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "jupyter": {
9
+ "is_executing": true
10
+ }
11
+ },
12
+ "source": [
13
+ "#第2章/加载编码工具\n",
14
+ "from transformers import BertTokenizer\n",
15
+ "tokenizer = BertTokenizer.from_pretrained(\n",
16
+ "pretrained_model_name_or_path='bert-base-chinese',\n",
17
+ "cache_dir=None,\n",
18
+ "force_download=False,\n",
19
+ ")\n",
20
+ "\n",
21
+ "#第2章/准备实验数据\n",
22
+ "sents = [\n",
23
+ "'你站在桥上看风景',\n",
24
+ "'看风景的人在楼上看你',\n",
25
+ "'明月装饰了你的窗子',\n",
26
+ "'你装饰了别人的梦',\n",
27
+ "]\n",
28
+ "\n",
29
+ "#第2章/基本的编码函数\n",
30
+ "out = tokenizer.encode(\n",
31
+ "text=sents[0],\n",
32
+ "text_pair=sents[1],\n",
33
+ "#当句子长度大于max_length时截断\n",
34
+ "truncation=True,\n",
35
+ "#一律补PAD,直到max_length长度\n",
36
+ "padding='max_length',\n",
37
+ "add_special_tokens=True,\n",
38
+ "max_length=25,\n",
39
+ "return_tensors=None,\n",
40
+ ")\n",
41
+ "print(out)\n",
42
+ "print(tokenizer.decode(out))"
43
+ ],
44
+ "outputs": [],
45
+ "execution_count": null
46
+ },
47
+ {
48
+ "metadata": {},
49
+ "cell_type": "code",
50
+ "outputs": [],
51
+ "execution_count": null,
52
+ "source": "",
53
+ "id": "1c21ec1248b8a72a"
54
+ }
55
+ ],
56
+ "metadata": {
57
+ "kernelspec": {
58
+ "display_name": "Python 3",
59
+ "language": "python",
60
+ "name": "python3"
61
+ },
62
+ "language_info": {
63
+ "codemirror_mode": {
64
+ "name": "ipython",
65
+ "version": 2
66
+ },
67
+ "file_extension": ".py",
68
+ "mimetype": "text/x-python",
69
+ "name": "python",
70
+ "nbconvert_exporter": "python",
71
+ "pygments_lexer": "ipython2",
72
+ "version": "2.7.6"
73
+ }
74
+ },
75
+ "nbformat": 4,
76
+ "nbformat_minor": 5
77
+ }
demo.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #第3章/加载数据集
2
+ from datasets import load_dataset
3
+ dataset = load_dataset(path='seamew/ChnSentiCorp')
4
+ dataset