ky00040 commited on
Commit
2e8b735
·
verified ·
1 Parent(s): 22e7ca4

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,204 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ library_name: peft
6
+ base_model: Qwen/Qwen3-Coder-30B-A3B-Instruct
7
+ tags:
8
+ - motoko
9
+ - internet-computer
10
+ - icp
11
+ - code-generation
12
+ - blockchain
13
+ - defi
14
+ - lora
15
+ - menese-protocol
16
+ datasets: []
17
+ pipeline_tag: text-generation
18
+ ---
19
+
20
+ # MotokoCoderV0
21
+
22
+ **The first code generation model for Motoko** — the native language of the [Internet Computer](https://internetcomputer.org/) blockchain.
23
+
24
+ Part of the **Motoko Coder** model series by [Menese Protocol](https://meneseprotocol.io). Smaller and larger models are planned for production use, with an API available for developers to try. This V0 release uses Qwen3-Coder-30B-A3B as the base — a commercially licensable model you can run and deploy freely.
25
+
26
+ ## Highlights
27
+
28
+ - **70% compilation rate** on a balanced evaluation set of 20 diverse Motoko programming tasks
29
+ - Generates production-quality `persistent actor` code with proper `mo:core` imports
30
+ - Writes compilable **AMM swap pools**, **escrow services**, **token ledgers**, **staking contracts**, **admin access control**, and more
31
+ - LoRA adapter (205MB) on top of Qwen3-Coder-30B-A3B-Instruct
32
+ - Verified against the official `moc` compiler from DFINITY SDK
33
+
34
+ ## Motoko Coder Series
35
+
36
+ | Model | Base | Status | Use Case |
37
+ |-------|------|--------|----------|
38
+ | **MotokoCoderV0** | Qwen3-Coder-30B-A3B | ✅ Released | Local development, commercial use |
39
+ | MotokoCoderV1 | TBD | 🔜 Coming soon | Higher accuracy, self-repair |
40
+ | MotokoCoder-API | Hosted | 🔜 Coming soon | API access for all developers |
41
+ | MotokoCoder-Small | Smaller base | 🔜 Planned | Edge deployment, IDE plugins |
42
+ | MotokoCoder-Pro | Larger base | 🔜 Planned | Production code generation |
43
+
44
+ ## Evaluation Results
45
+
46
+ Tested against the `moc` compiler — every "compiled" result is verified machine-checked code.
47
+
48
+ | Category | Compiled | Rate |
49
+ |----------|----------|------|
50
+ | Easy (contact forms, todo lists, profiles) | 4/7 | 57% |
51
+ | Medium (voting, ledgers, config stores, event logs) | 6/8 | 75% |
52
+ | Hard (AMM pools, staking, escrow, batch transfers) | 4/5 | 80% |
53
+ | **Overall** | **14/20** | **70%** |
54
+
55
+ ### What it compiles
56
+
57
+ - Persistent actors with `Map`, `Set`, `Principal`, `Time` state management
58
+ - CRUD operations with proper `Map.add`/`Map.get`/`Map.delete` and compare functions
59
+ - DeFi primitives: constant product AMM formula, fee collection, reserve tracking
60
+ - State machines with variant types (`#Created`, `#Funded`, `#Released`)
61
+ - Admin access control with `Principal.equal` checks
62
+ - Record updates with `{ record with field = newValue }` syntax
63
+ - Result types with `#ok`/`#err` error handling
64
+ - Query vs update function separation
65
+ - Token ledgers with transfer, mint, burn operations
66
+ - Escrow services with full lifecycle management
67
+ - Online stores (bookstore, restaurant menus) with inventory management
68
+
69
+ ### Example: AMM Swap Pool (compiles ✅)
70
+
71
+ ```motoko
72
+ import Map "mo:core/Map";
73
+ import Nat "mo:core/Nat";
74
+ import Principal "mo:core/Principal";
75
+ import Result "mo:core/Result";
76
+
77
+ persistent actor AMMSwapPool {
78
+ var reserveA : Nat = 1_000_000;
79
+ var reserveB : Nat = 1_000_000;
80
+ var totalFees : Nat = 0;
81
+
82
+ func getOutputAmount(inputAmount : Nat, inputReserve : Nat, outputReserve : Nat) : Nat {
83
+ let numerator = inputAmount * outputReserve * 997;
84
+ let denominator = (inputReserve * 1000) + (inputAmount * 997);
85
+ numerator / denominator;
86
+ };
87
+
88
+ public shared(msg) func swap(inputToken : Text, inputAmount : Nat) : async Result.Result<Nat, Text> {
89
+ if (inputAmount == 0) { return #err("Amount must be > 0") };
90
+ let outputAmount = getOutputAmount(inputAmount, reserveA, reserveB);
91
+ let fee = inputAmount * 3 / 1000;
92
+ totalFees += fee;
93
+ reserveA += inputAmount;
94
+ reserveB -= outputAmount;
95
+ #ok(outputAmount);
96
+ };
97
+
98
+ public query func getReserves() : async { reserveA : Nat; reserveB : Nat } {
99
+ { reserveA; reserveB };
100
+ };
101
+ };
102
+ ```
103
+
104
+ ### Example: Escrow Service (compiles ✅, 156 lines)
105
+
106
+ ```motoko
107
+ persistent actor EscrowService {
108
+ public type EscrowState = {
109
+ #Created; #Funded; #Disputed; #Released; #Refunded;
110
+ };
111
+
112
+ public type Escrow = {
113
+ id : Nat; buyer : Principal; seller : Principal;
114
+ amount : Nat; state : EscrowState; createdAt : Int;
115
+ };
116
+
117
+ var escrows = Map.empty<Nat, Escrow>();
118
+
119
+ public shared(msg) func createEscrow(seller : Principal, amount : Nat) : async Result.Result<Nat, Text> { ... };
120
+ public shared(msg) func fundEscrow(id : Nat) : async Result.Result<(), Text> { ... };
121
+ public shared(msg) func releaseFunds(id : Nat) : async Result.Result<(), Text> { ... };
122
+ public shared(msg) func dispute(id : Nat) : async Result.Result<(), Text> { ... };
123
+ };
124
+ ```
125
+
126
+ ## Usage
127
+
128
+ ```python
129
+ from transformers import AutoModelForCausalLM, AutoTokenizer
130
+ from peft import PeftModel
131
+ import torch
132
+
133
+ base_model = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
134
+ adapter = "ky00040/MotokoCoderV0"
135
+
136
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
137
+ model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
138
+ model = PeftModel.from_pretrained(model, adapter)
139
+ model = model.merge_and_unload()
140
+
141
+ messages = [
142
+ {"role": "system", "content": "You are a Motoko expert for the Internet Computer. Write clean, compilable Motoko code using mo:core imports. Use `persistent actor` for actors, Map.empty/add/get with compare functions."},
143
+ {"role": "user", "content": "Write a Motoko persistent actor for a token balance ledger with transfer, mint, and balance query."}
144
+ ]
145
+
146
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
147
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
148
+
149
+ with torch.no_grad():
150
+ outputs = model.generate(**inputs, max_new_tokens=2048, temperature=0.1, do_sample=True, top_p=0.95)
151
+
152
+ response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
153
+ print(response)
154
+ ```
155
+
156
+ ## System Prompt
157
+
158
+ For best results, use this system prompt:
159
+
160
+ ```
161
+ You are a Motoko expert for the Internet Computer. Write clean, compilable Motoko code using mo:core imports. Use `persistent actor` for actors, Map.empty/add/get with compare functions.
162
+ ```
163
+
164
+ ## Tips for Best Results
165
+
166
+ 1. **Ask for full actors**: "Write a Motoko persistent actor for X" works better than "Write a function that does X"
167
+ 2. **Describe the types**: "Store items with name, price, and category" helps the model define proper types
168
+ 3. **Mention state**: "Use Map for storage" guides the model toward correct patterns
169
+ 4. **Temperature 0.1** for reliable code, **0.7** for creative variations
170
+
171
+ ## Known Limitations
172
+
173
+ - Standalone function prompts without context may reference undefined types
174
+ - Very long actors (200+ lines) may occasionally truncate
175
+ - String manipulation and regex-style operations are weak
176
+ - HTTP outcall and inter-canister call patterns are limited
177
+ - Sometimes uses OOP-style method calls (`.toArray()`) instead of module functions (`Iter.toArray()`)
178
+
179
+ ## Model Details
180
+
181
+ - **Base model**: Qwen3-Coder-30B-A3B-Instruct (MoE architecture, 30B total parameters, 3B active per forward pass)
182
+ - **Adapter type**: LoRA with rsLoRA scaling
183
+ - **Adapter config**: r=64, alpha=128
184
+ - **Target modules**: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
185
+ - **Trainable parameters**: 53.5M (0.17% of total)
186
+ - **Compilation verification**: All evaluation results verified against `moc` (Motoko compiler) from DFINITY SDK v0.31.0
187
+
188
+ ## About Motoko
189
+
190
+ [Motoko](https://internetcomputer.org/docs/motoko/main/getting-started/motoko-introduction) is a programming language designed specifically for the Internet Computer blockchain. Key features include:
191
+ - **Persistent actors** — canister smart contracts with automatic state persistence
192
+ - **Async/await** — native support for inter-canister communication
193
+ - **Strong type system** — derived from OCaml, with variants, options, and generics
194
+ - **mo:core standard library** — Map, Set, List, Array, Principal, Time, and more
195
+
196
+ MotokoCoderV0 uses the modern `mo:core` standard library (not the deprecated `mo:base`).
197
+
198
+ ## About Menese Protocol
199
+
200
+ [Menese Protocol](https://meneseprotocol.io) builds cross-chain DeFi infrastructure on the Internet Computer. MotokoCoderV0 was developed jointly by **Mercatura Forum AI Lab** and **ICP Hub Egypt** as part of our mission to improve developer tooling for the ICP ecosystem.
201
+
202
+ ## License
203
+
204
+ Apache 2.0 — free for commercial use.
adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 128,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "q_proj",
33
+ "v_proj",
34
+ "down_proj",
35
+ "o_proj",
36
+ "up_proj",
37
+ "k_proj",
38
+ "gate_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": true
46
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e69ab548752ecbcb8820ddc9bb2759a95d09246f6d58f9b65ae8e942d30f2c
3
+ size 213961576
chat_template.jinja ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% macro render_extra_keys(json_dict, handled_keys) %}
2
+ {%- if json_dict is mapping %}
3
+ {%- for json_key in json_dict if json_key not in handled_keys %}
4
+ {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
5
+ {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
6
+ {%- else %}
7
+ {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
8
+ {%- endif %}
9
+ {%- endfor %}
10
+ {%- endif %}
11
+ {% endmacro %}
12
+
13
+ {%- if messages[0]["role"] == "system" %}
14
+ {%- set system_message = messages[0]["content"] %}
15
+ {%- set loop_messages = messages[1:] %}
16
+ {%- else %}
17
+ {%- set loop_messages = messages %}
18
+ {%- endif %}
19
+
20
+ {%- if not tools is defined %}
21
+ {%- set tools = [] %}
22
+ {%- endif %}
23
+
24
+ {%- if system_message is defined %}
25
+ {{- "<|im_start|>system\n" + system_message }}
26
+ {%- else %}
27
+ {%- if tools is iterable and tools | length > 0 %}
28
+ {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
29
+ {%- endif %}
30
+ {%- endif %}
31
+ {%- if tools is iterable and tools | length > 0 %}
32
+ {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
33
+ {{- "<tools>" }}
34
+ {%- for tool in tools %}
35
+ {%- if tool.function is defined %}
36
+ {%- set tool = tool.function %}
37
+ {%- endif %}
38
+ {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
39
+ {%- if tool.description is defined %}
40
+ {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
41
+ {%- endif %}
42
+ {{- '\n<parameters>' }}
43
+ {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
44
+ {%- for param_name, param_fields in tool.parameters.properties|items %}
45
+ {{- '\n<parameter>' }}
46
+ {{- '\n<name>' ~ param_name ~ '</name>' }}
47
+ {%- if param_fields.type is defined %}
48
+ {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
49
+ {%- endif %}
50
+ {%- if param_fields.description is defined %}
51
+ {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
52
+ {%- endif %}
53
+ {%- set handled_keys = ['name', 'type', 'description'] %}
54
+ {{- render_extra_keys(param_fields, handled_keys) }}
55
+ {{- '\n</parameter>' }}
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {% set handled_keys = ['type', 'properties'] %}
59
+ {{- render_extra_keys(tool.parameters, handled_keys) }}
60
+ {{- '\n</parameters>' }}
61
+ {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
62
+ {{- render_extra_keys(tool, handled_keys) }}
63
+ {{- '\n</function>' }}
64
+ {%- endfor %}
65
+ {{- "\n</tools>" }}
66
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
67
+ {%- endif %}
68
+ {%- if system_message is defined %}
69
+ {{- '<|im_end|>\n' }}
70
+ {%- else %}
71
+ {%- if tools is iterable and tools | length > 0 %}
72
+ {{- '<|im_end|>\n' }}
73
+ {%- endif %}
74
+ {%- endif %}
75
+ {%- for message in loop_messages %}
76
+ {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
77
+ {{- '<|im_start|>' + message.role }}
78
+ {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
79
+ {{- '\n' + message.content | trim + '\n' }}
80
+ {%- endif %}
81
+ {%- for tool_call in message.tool_calls %}
82
+ {%- if tool_call.function is defined %}
83
+ {%- set tool_call = tool_call.function %}
84
+ {%- endif %}
85
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
86
+ {%- if tool_call.arguments is defined %}
87
+ {%- for args_name, args_value in tool_call.arguments|items %}
88
+ {{- '<parameter=' + args_name + '>\n' }}
89
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
90
+ {{- args_value }}
91
+ {{- '\n</parameter>\n' }}
92
+ {%- endfor %}
93
+ {%- endif %}
94
+ {{- '</function>\n</tool_call>' }}
95
+ {%- endfor %}
96
+ {{- '<|im_end|>\n' }}
97
+ {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
98
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
99
+ {%- elif message.role == "tool" %}
100
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
101
+ {{- '<|im_start|>user\n' }}
102
+ {%- endif %}
103
+ {{- '<tool_response>\n' }}
104
+ {{- message.content }}
105
+ {{- '\n</tool_response>\n' }}
106
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
107
+ {{- '<|im_end|>\n' }}
108
+ {%- elif loop.last %}
109
+ {{- '<|im_end|>\n' }}
110
+ {%- endif %}
111
+ {%- else %}
112
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
113
+ {%- endif %}
114
+ {%- endfor %}
115
+ {%- if add_generation_prompt %}
116
+ {{- '<|im_start|>assistant\n' }}
117
+ {%- endif %}
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bae3e39d56cfdb7b650cb318344d5c0f071d19fc9868ce086fef0cee78d5e7ff
3
+ size 11422749
tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "model_max_length": 1048576,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }