Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.gitattributes +1 -0
README.md +204 -3
adapter_config.json +46 -0
adapter_model.safetensors +3 -0
chat_template.jinja +117 -0
tokenizer.json +3 -0
tokenizer_config.json +29 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,204 @@
----
-license: apache-2.0
----

+---
+language:
+- en
+license: apache-2.0
+library_name: peft
+base_model: Qwen/Qwen3-Coder-30B-A3B-Instruct
+tags:
+- motoko
+- internet-computer
+- icp
+- code-generation
+- blockchain
+- defi
+- lora
+- menese-protocol
+datasets: []
+pipeline_tag: text-generation
+---
+# MotokoCoderV0
+**The first code generation model for Motoko** — the native language of the [Internet Computer](https://internetcomputer.org/) blockchain.
+Part of the **Motoko Coder** model series by [Menese Protocol](https://meneseprotocol.io). Smaller and larger models are planned for production use, with an API available for developers to try. This V0 release uses Qwen3-Coder-30B-A3B as the base — a commercially licensable model you can run and deploy freely.
+## Highlights
+- **70% compilation rate** on a balanced evaluation set of 20 diverse Motoko programming tasks
+- Generates production-quality `persistent actor` code with proper `mo:core` imports
+- Writes compilable **AMM swap pools**, **escrow services**, **token ledgers**, **staking contracts**, **admin access control**, and more
+- LoRA adapter (205MB) on top of Qwen3-Coder-30B-A3B-Instruct
+- Verified against the official `moc` compiler from DFINITY SDK
+## Motoko Coder Series
+| Model | Base | Status | Use Case |
+|-------|------|--------|----------|
+| **MotokoCoderV0** | Qwen3-Coder-30B-A3B | ✅ Released | Local development, commercial use |
+| MotokoCoderV1 | TBD | 🔜 Coming soon | Higher accuracy, self-repair |
+| MotokoCoder-API | Hosted | 🔜 Coming soon | API access for all developers |
+| MotokoCoder-Small | Smaller base | 🔜 Planned | Edge deployment, IDE plugins |
+| MotokoCoder-Pro | Larger base | 🔜 Planned | Production code generation |
+## Evaluation Results
+Tested against the `moc` compiler — every "compiled" result is verified machine-checked code.
+| Category | Compiled | Rate |
+|----------|----------|------|
+| Easy (contact forms, todo lists, profiles) | 4/7 | 57% |
+| Medium (voting, ledgers, config stores, event logs) | 6/8 | 75% |
+| Hard (AMM pools, staking, escrow, batch transfers) | 4/5 | 80% |
+| **Overall** | **14/20** | **70%** |
+### What it compiles
+- Persistent actors with `Map`, `Set`, `Principal`, `Time` state management
+- CRUD operations with proper `Map.add`/`Map.get`/`Map.delete` and compare functions
+- DeFi primitives: constant product AMM formula, fee collection, reserve tracking
+- State machines with variant types (`#Created`, `#Funded`, `#Released`)
+- Admin access control with `Principal.equal` checks
+- Record updates with `{ record with field = newValue }` syntax
+- Result types with `#ok`/`#err` error handling
+- Query vs update function separation
+- Token ledgers with transfer, mint, burn operations
+- Escrow services with full lifecycle management
+- Online stores (bookstore, restaurant menus) with inventory management
+### Example: AMM Swap Pool (compiles ✅)
+```motoko
+import Map "mo:core/Map";
+import Nat "mo:core/Nat";
+import Principal "mo:core/Principal";
+import Result "mo:core/Result";
+persistent actor AMMSwapPool {
+  var reserveA : Nat = 1_000_000;
+  var reserveB : Nat = 1_000_000;
+  var totalFees : Nat = 0;
+  func getOutputAmount(inputAmount : Nat, inputReserve : Nat, outputReserve : Nat) : Nat {
+    let numerator = inputAmount * outputReserve * 997;
+    let denominator = (inputReserve * 1000) + (inputAmount * 997);
+    numerator / denominator;
+  };
+  public shared(msg) func swap(inputToken : Text, inputAmount : Nat) : async Result.Result<Nat, Text> {
+    if (inputAmount == 0) { return #err("Amount must be > 0") };
+    let outputAmount = getOutputAmount(inputAmount, reserveA, reserveB);
+    let fee = inputAmount * 3 / 1000;
+    totalFees += fee;
+    reserveA += inputAmount;
+    reserveB -= outputAmount;
+    #ok(outputAmount);
+  };
+  public query func getReserves() : async { reserveA : Nat; reserveB : Nat } {
+    { reserveA; reserveB };
+  };
+};
+```
+### Example: Escrow Service (compiles ✅, 156 lines)
+```motoko
+persistent actor EscrowService {
+  public type EscrowState = {
+    #Created; #Funded; #Disputed; #Released; #Refunded;
+  };
+  public type Escrow = {
+    id : Nat; buyer : Principal; seller : Principal;
+    amount : Nat; state : EscrowState; createdAt : Int;
+  };
+  var escrows = Map.empty<Nat, Escrow>();
+  public shared(msg) func createEscrow(seller : Principal, amount : Nat) : async Result.Result<Nat, Text> { ... };
+  public shared(msg) func fundEscrow(id : Nat) : async Result.Result<(), Text> { ... };
+  public shared(msg) func releaseFunds(id : Nat) : async Result.Result<(), Text> { ... };
+  public shared(msg) func dispute(id : Nat) : async Result.Result<(), Text> { ... };
+};
+```
+## Usage
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import torch
+base_model = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
+adapter = "ky00040/MotokoCoderV0"
+tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
+model = PeftModel.from_pretrained(model, adapter)
+model = model.merge_and_unload()
+messages = [
+    {"role": "system", "content": "You are a Motoko expert for the Internet Computer. Write clean, compilable Motoko code using mo:core imports. Use `persistent actor` for actors, Map.empty/add/get with compare functions."},
+    {"role": "user", "content": "Write a Motoko persistent actor for a token balance ledger with transfer, mint, and balance query."}
+]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+inputs = tokenizer(text, return_tensors="pt").to(model.device)
+with torch.no_grad():
+    outputs = model.generate(**inputs, max_new_tokens=2048, temperature=0.1, do_sample=True, top_p=0.95)
+response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+print(response)
+```
+## System Prompt
+For best results, use this system prompt:
+```
+You are a Motoko expert for the Internet Computer. Write clean, compilable Motoko code using mo:core imports. Use `persistent actor` for actors, Map.empty/add/get with compare functions.
+```
+## Tips for Best Results
+1. **Ask for full actors**: "Write a Motoko persistent actor for X" works better than "Write a function that does X"
+2. **Describe the types**: "Store items with name, price, and category" helps the model define proper types
+3. **Mention state**: "Use Map for storage" guides the model toward correct patterns
+4. **Temperature 0.1** for reliable code, **0.7** for creative variations
+## Known Limitations
+- Standalone function prompts without context may reference undefined types
+- Very long actors (200+ lines) may occasionally truncate
+- String manipulation and regex-style operations are weak
+- HTTP outcall and inter-canister call patterns are limited
+- Sometimes uses OOP-style method calls (`.toArray()`) instead of module functions (`Iter.toArray()`)
+## Model Details
+- **Base model**: Qwen3-Coder-30B-A3B-Instruct (MoE architecture, 30B total parameters, 3B active per forward pass)
+- **Adapter type**: LoRA with rsLoRA scaling
+- **Adapter config**: r=64, alpha=128
+- **Target modules**: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
+- **Trainable parameters**: 53.5M (0.17% of total)
+- **Compilation verification**: All evaluation results verified against `moc` (Motoko compiler) from DFINITY SDK v0.31.0
+## About Motoko
+[Motoko](https://internetcomputer.org/docs/motoko/main/getting-started/motoko-introduction) is a programming language designed specifically for the Internet Computer blockchain. Key features include:
+- **Persistent actors** — canister smart contracts with automatic state persistence
+- **Async/await** — native support for inter-canister communication
+- **Strong type system** — derived from OCaml, with variants, options, and generics
+- **mo:core standard library** — Map, Set, List, Array, Principal, Time, and more
+MotokoCoderV0 uses the modern `mo:core` standard library (not the deprecated `mo:base`).
+## About Menese Protocol
+[Menese Protocol](https://meneseprotocol.io) builds cross-chain DeFi infrastructure on the Internet Computer. MotokoCoderV0 was developed jointly by **Mercatura Forum AI Lab** and **ICP Hub Egypt** as part of our mission to improve developer tooling for the ICP ecosystem.
+## License
+Apache 2.0 — free for commercial use.

adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "down_proj",
+    "o_proj",
+    "up_proj",
+    "k_proj",
+    "gate_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": true
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e69ab548752ecbcb8820ddc9bb2759a95d09246f6d58f9b65ae8e942d30f2c
+size 213961576

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,117 @@

+{% macro render_extra_keys(json_dict, handled_keys) %}
+    {%- if json_dict is mapping %}
+        {%- for json_key in json_dict if json_key not in handled_keys %}
+            {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
+                {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
+            {%- else %}
+                {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+{% endmacro %}
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = [] %}
+{%- endif %}
+{%- if system_message is defined %}
+    {{- "<|im_start|>system\n" + system_message }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
+    {%- endif %}
+{%- endif %}
+{%- if tools is iterable and tools | length > 0 %}
+    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "<tools>" }}
+    {%- for tool in tools %}
+        {%- if tool.function is defined %}
+            {%- set tool = tool.function %}
+        {%- endif %}
+        {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
+        {%- if tool.description is defined %}
+            {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
+        {%- endif %}
+        {{- '\n<parameters>' }}
+        {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
+            {%- for param_name, param_fields in tool.parameters.properties|items %}
+                {{- '\n<parameter>' }}
+                {{- '\n<name>' ~ param_name ~ '</name>' }}
+                {%- if param_fields.type is defined %}
+                    {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
+                {%- endif %}
+                {%- if param_fields.description is defined %}
+                    {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
+                {%- endif %}
+                {%- set handled_keys = ['name', 'type', 'description'] %}
+                {{- render_extra_keys(param_fields, handled_keys) }}
+                {{- '\n</parameter>' }}
+            {%- endfor %}
+        {%- endif %}
+        {% set handled_keys = ['type', 'properties'] %}
+        {{- render_extra_keys(tool.parameters, handled_keys) }}
+        {{- '\n</parameters>' }}
+        {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
+        {{- render_extra_keys(tool, handled_keys) }}
+        {{- '\n</function>' }}
+    {%- endfor %}
+    {{- "\n</tools>" }}
+    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+{%- endif %}
+{%- if system_message is defined %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in loop_messages %}
+    {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
+            {{- '\n' + message.content | trim + '\n' }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+            {%- if tool_call.arguments is defined %}
+                {%- for args_name, args_value in tool_call.arguments|items %}
+                    {{- '<parameter=' + args_name + '>\n' }}
+                    {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                    {{- args_value }}
+                    {{- '\n</parameter>\n' }}
+                {%- endfor %}
+            {%- endif %}
+            {{- '</function>\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>\n' }}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>\n' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- else %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bae3e39d56cfdb7b650cb318344d5c0f071d19fc9868ce086fef0cee78d5e7ff
+size 11422749

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "is_local": false,
+  "model_max_length": 1048576,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}