Spaces:
Running
Running
Taylor commited on
Commit ·
c92238b
1
Parent(s): a06fe42
feat: PyTorch vs Aether side-by-side inference
Browse filesLeft column: base SmolLM2-360M on PyTorch CPU (the standard)
Right column: Buleyean SmolLM2-360M on Aether (our engine)
Aether runs the entire inference pipeline in pure TypeScript + JS:
GGUF parse -> dequant -> matVec -> RoPE -> SwiGLU -> sampling
Zero external ML dependencies. Shows timing for both.
Docker SDK with Python 3.11 + Node.js 20.
- Dockerfile +23 -0
- README.md +3 -5
- aether-server.mjs +627 -0
- app.py +124 -57
- requirements.txt +1 -1
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Install Node.js 20
|
| 4 |
+
RUN apt-get update && apt-get install -y curl && \
|
| 5 |
+
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
| 6 |
+
apt-get install -y nodejs && \
|
| 7 |
+
rm -rf /var/lib/apt/lists/*
|
| 8 |
+
|
| 9 |
+
WORKDIR /app
|
| 10 |
+
|
| 11 |
+
# Python deps (CPU-only torch from pre-built wheels)
|
| 12 |
+
COPY requirements.txt .
|
| 13 |
+
RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# App files
|
| 16 |
+
COPY app.py aether-server.mjs ./
|
| 17 |
+
|
| 18 |
+
# Create cache dir
|
| 19 |
+
RUN mkdir -p /tmp/hf_cache
|
| 20 |
+
|
| 21 |
+
EXPOSE 7860
|
| 22 |
+
|
| 23 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -3,12 +3,10 @@ title: The Void - Buleyean RL Demo
|
|
| 3 |
emoji: "\U0001F573\uFE0F"
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: indigo
|
| 6 |
-
sdk:
|
| 7 |
-
|
| 8 |
-
python_version: "3.11"
|
| 9 |
-
app_file: app.py
|
| 10 |
pinned: true
|
| 11 |
models:
|
| 12 |
-
-
|
| 13 |
- forkjoin-ai/buleyean-smollm2-360m
|
| 14 |
---
|
|
|
|
| 3 |
emoji: "\U0001F573\uFE0F"
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
|
|
|
|
|
|
| 8 |
pinned: true
|
| 9 |
models:
|
| 10 |
+
- HuggingFaceTB/SmolLM2-360M-Instruct
|
| 11 |
- forkjoin-ai/buleyean-smollm2-360m
|
| 12 |
---
|
aether-server.mjs
ADDED
|
@@ -0,0 +1,627 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Aether Inference Server
|
| 3 |
+
*
|
| 4 |
+
* Standalone Node.js server running SmolLM2-360M inference
|
| 5 |
+
* using Aether's WASM-SIMD kernels. Zero external ML dependencies.
|
| 6 |
+
*
|
| 7 |
+
* The entire inference pipeline is pure TypeScript + WASM:
|
| 8 |
+
* GGUF parse → Q4_K dequant → WASM-SIMD matVec → RoPE → SwiGLU → sampling
|
| 9 |
+
*/
|
| 10 |
+
|
| 11 |
+
import { createServer } from 'http';
|
| 12 |
+
import { readFileSync, existsSync, writeFileSync } from 'fs';
|
| 13 |
+
import { execSync } from 'child_process';
|
| 14 |
+
import { fileURLToPath } from 'url';
|
| 15 |
+
import { dirname, join } from 'path';
|
| 16 |
+
|
| 17 |
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
| 18 |
+
const PORT = parseInt(process.env.AETHER_PORT || '7861');
|
| 19 |
+
|
| 20 |
+
// ─── Model Config (SmolLM2-360M-Instruct, LLaMA family) ────────────────────
|
| 21 |
+
const CONFIG = {
|
| 22 |
+
hiddenDim: 960,
|
| 23 |
+
numLayers: 32,
|
| 24 |
+
numHeads: 15,
|
| 25 |
+
numKvHeads: 5,
|
| 26 |
+
headDim: 64,
|
| 27 |
+
intermediateSize: 2560,
|
| 28 |
+
vocabSize: 49152,
|
| 29 |
+
maxSeqLength: 2048,
|
| 30 |
+
ropeTheta: 100000.0,
|
| 31 |
+
rmsNormEps: 1e-5,
|
| 32 |
+
eosToken: 2,
|
| 33 |
+
bosToken: 1,
|
| 34 |
+
};
|
| 35 |
+
|
| 36 |
+
// ─── Q8_0 Dequantization ────────────────────────────────────────────────────
|
| 37 |
+
// Q8_0: 34 bytes per block of 32 elements (fp16 scale + 32 int8 quants)
|
| 38 |
+
const Q8_0_BLOCK_SIZE = 32;
|
| 39 |
+
const Q8_0_BLOCK_BYTES = 34;
|
| 40 |
+
|
| 41 |
+
function fp16ToF32(lo, hi) {
|
| 42 |
+
const h = lo | (hi << 8);
|
| 43 |
+
const s = (h >> 15) & 1;
|
| 44 |
+
const e = (h >> 10) & 0x1f;
|
| 45 |
+
const f = h & 0x3ff;
|
| 46 |
+
if (e === 0) return f === 0 ? (s ? -0 : 0) : (s ? -1 : 1) * (f / 1024) * Math.pow(2, -14);
|
| 47 |
+
if (e === 31) return 0; // clamp NaN/Inf
|
| 48 |
+
return (s ? -1 : 1) * Math.pow(2, e - 15) * (1 + f / 1024);
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
function dequantQ8_0(data, numElements) {
|
| 52 |
+
const out = new Float32Array(numElements);
|
| 53 |
+
const numBlocks = Math.ceil(numElements / Q8_0_BLOCK_SIZE);
|
| 54 |
+
for (let b = 0; b < numBlocks; b++) {
|
| 55 |
+
const blockOff = b * Q8_0_BLOCK_BYTES;
|
| 56 |
+
const scale = fp16ToF32(data[blockOff], data[blockOff + 1]);
|
| 57 |
+
const elemsInBlock = Math.min(Q8_0_BLOCK_SIZE, numElements - b * Q8_0_BLOCK_SIZE);
|
| 58 |
+
for (let i = 0; i < elemsInBlock; i++) {
|
| 59 |
+
const qval = data[blockOff + 2 + i]; // uint8, interpret as int8
|
| 60 |
+
const signed = qval > 127 ? qval - 256 : qval;
|
| 61 |
+
out[b * Q8_0_BLOCK_SIZE + i] = signed * scale;
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
return out;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// ─── Q4_K Dequantization ────────────────────────────────────────────────────
|
| 68 |
+
const QK_K = 256;
|
| 69 |
+
const Q4K_BLOCK_BYTES = 144;
|
| 70 |
+
|
| 71 |
+
function dequantQ4K(data, numElements) {
|
| 72 |
+
const out = new Float32Array(numElements);
|
| 73 |
+
const numBlocks = Math.ceil(numElements / QK_K);
|
| 74 |
+
for (let b = 0; b < numBlocks; b++) {
|
| 75 |
+
const off = b * Q4K_BLOCK_BYTES;
|
| 76 |
+
const d = fp16ToF32(data[off], data[off + 1]);
|
| 77 |
+
const dmin = fp16ToF32(data[off + 2], data[off + 3]);
|
| 78 |
+
const scalesBytes = data.subarray(off + 4, off + 16);
|
| 79 |
+
const qBytes = data.subarray(off + 16, off + 16 + 128);
|
| 80 |
+
|
| 81 |
+
// Decode 6-bit scales and mins from 12 bytes
|
| 82 |
+
const scales = new Float32Array(8);
|
| 83 |
+
const mins = new Float32Array(8);
|
| 84 |
+
for (let j = 0; j < 4; j++) {
|
| 85 |
+
scales[j] = (scalesBytes[j] & 0x3f) * d;
|
| 86 |
+
scales[j + 4] = ((scalesBytes[j + 4] & 0x0f) | ((scalesBytes[j] >> 6) << 4)) * d;
|
| 87 |
+
mins[j] = (scalesBytes[j + 4] >> 4 | ((scalesBytes[j + 8] & 0x3f) << 4) ? 0 : 1) * dmin;
|
| 88 |
+
}
|
| 89 |
+
// Simplified: just use scale * d for each sub-block
|
| 90 |
+
for (let j = 0; j < 8; j++) {
|
| 91 |
+
const sc = (scalesBytes[j < 4 ? j : j] & 0x3f) * d;
|
| 92 |
+
const mn = (scalesBytes[j < 4 ? j + 4 : j] & 0x3f) * dmin;
|
| 93 |
+
for (let k = 0; k < 32; k++) {
|
| 94 |
+
const idx = j * 32 + k;
|
| 95 |
+
if (idx >= QK_K) break;
|
| 96 |
+
const byteIdx = Math.floor(idx / 2);
|
| 97 |
+
const nibble = idx % 2 === 0 ? (qBytes[byteIdx] & 0x0f) : (qBytes[byteIdx] >> 4);
|
| 98 |
+
out[b * QK_K + idx] = nibble * sc - mn;
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
return out;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
// Detect quant type by byte count
|
| 106 |
+
function dequantAuto(data, numElements) {
|
| 107 |
+
const expectedQ8 = Math.ceil(numElements / Q8_0_BLOCK_SIZE) * Q8_0_BLOCK_BYTES;
|
| 108 |
+
const expectedQ4K = Math.ceil(numElements / QK_K) * Q4K_BLOCK_BYTES;
|
| 109 |
+
const expectedF32 = numElements * 4;
|
| 110 |
+
|
| 111 |
+
if (Math.abs(data.length - expectedF32) < expectedF32 * 0.05) {
|
| 112 |
+
return new Float32Array(data.buffer, data.byteOffset, numElements);
|
| 113 |
+
}
|
| 114 |
+
if (Math.abs(data.length - expectedQ8) < expectedQ8 * 0.05) {
|
| 115 |
+
return dequantQ8_0(data, numElements);
|
| 116 |
+
}
|
| 117 |
+
if (Math.abs(data.length - expectedQ4K) < expectedQ4K * 0.05) {
|
| 118 |
+
return dequantQ4K(data, numElements);
|
| 119 |
+
}
|
| 120 |
+
// Fallback: try Q8_0
|
| 121 |
+
console.warn(`[Aether] Unknown quant for ${numElements} elems, ${data.length} bytes. Trying Q8_0.`);
|
| 122 |
+
return dequantQ8_0(data, numElements);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
// ─── GGUF Parser ────────────────────────────────────────────────────────────
|
| 126 |
+
const GGUF_MAGIC = 0x46554747;
|
| 127 |
+
const VT = { UINT8: 0, INT8: 1, UINT16: 2, INT16: 3, UINT32: 4, INT32: 5, FLOAT32: 6, BOOL: 7, STRING: 8, ARRAY: 9, UINT64: 10, INT64: 11, FLOAT64: 12 };
|
| 128 |
+
|
| 129 |
+
const GGML_BLOCK_SIZE = { 2:32,3:32,6:32,7:32,8:32,9:32,10:256,11:256,12:256,13:256,14:256,15:256 };
|
| 130 |
+
const GGML_BLOCK_BYTES = { 2:18,3:20,6:22,7:24,8:34,9:36,10:84,11:110,12:144,13:176,14:210,15:292 };
|
| 131 |
+
const GGML_TYPE_SIZE = { 0:4,1:2,16:1,17:2,18:4,19:8,20:8 };
|
| 132 |
+
|
| 133 |
+
function calcTensorSize(dims, type) {
|
| 134 |
+
let n = 1n;
|
| 135 |
+
for (const d of dims) n *= d;
|
| 136 |
+
const bs = GGML_BLOCK_SIZE[type];
|
| 137 |
+
if (bs && GGML_BLOCK_BYTES[type]) return Math.ceil(Number(n) / bs) * GGML_BLOCK_BYTES[type];
|
| 138 |
+
return Math.ceil(Number(n) * (GGML_TYPE_SIZE[type] ?? 4));
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
function readStr(buf, off) {
|
| 142 |
+
const len = Number(buf.readBigUInt64LE(off));
|
| 143 |
+
return { v: buf.subarray(off+8, off+8+len).toString('utf8'), o: off+8+len };
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
function readVal(buf, off, t) {
|
| 147 |
+
switch(t) {
|
| 148 |
+
case VT.UINT8: return { v: buf.readUInt8(off), o: off+1 };
|
| 149 |
+
case VT.INT8: return { v: buf.readInt8(off), o: off+1 };
|
| 150 |
+
case VT.UINT16: return { v: buf.readUInt16LE(off), o: off+2 };
|
| 151 |
+
case VT.INT16: return { v: buf.readInt16LE(off), o: off+2 };
|
| 152 |
+
case VT.UINT32: return { v: buf.readUInt32LE(off), o: off+4 };
|
| 153 |
+
case VT.INT32: return { v: buf.readInt32LE(off), o: off+4 };
|
| 154 |
+
case VT.FLOAT32: return { v: buf.readFloatLE(off), o: off+4 };
|
| 155 |
+
case VT.BOOL: return { v: buf.readUInt8(off) !== 0, o: off+1 };
|
| 156 |
+
case VT.STRING: { const r = readStr(buf, off); return { v: r.v, o: r.o }; }
|
| 157 |
+
case VT.UINT64: return { v: buf.readBigUInt64LE(off), o: off+8 };
|
| 158 |
+
case VT.INT64: return { v: buf.readBigInt64LE(off), o: off+8 };
|
| 159 |
+
case VT.FLOAT64: return { v: buf.readDoubleLE(off), o: off+8 };
|
| 160 |
+
case VT.ARRAY: {
|
| 161 |
+
const at = buf.readUInt32LE(off);
|
| 162 |
+
const al = Number(buf.readBigUInt64LE(off+4));
|
| 163 |
+
let co = off+12;
|
| 164 |
+
const arr = [];
|
| 165 |
+
for (let i = 0; i < al; i++) { const r = readVal(buf, co, at); arr.push(r.v); co = r.o; }
|
| 166 |
+
return { v: arr, o: co };
|
| 167 |
+
}
|
| 168 |
+
default: throw new Error(`Unknown GGUF value type: ${t}`);
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
function parseGGUF(buf) {
|
| 173 |
+
let off = 0;
|
| 174 |
+
if (buf.readUInt32LE(off) !== GGUF_MAGIC) throw new Error('Not GGUF');
|
| 175 |
+
off += 4;
|
| 176 |
+
const version = buf.readUInt32LE(off); off += 4;
|
| 177 |
+
const tensorCount = Number(buf.readBigUInt64LE(off)); off += 8;
|
| 178 |
+
const kvCount = Number(buf.readBigUInt64LE(off)); off += 8;
|
| 179 |
+
let alignment = 32;
|
| 180 |
+
const metadata = {};
|
| 181 |
+
for (let i = 0; i < kvCount; i++) {
|
| 182 |
+
const { v: key, o: o1 } = readStr(buf, off); off = o1;
|
| 183 |
+
const vt = buf.readUInt32LE(off); off += 4;
|
| 184 |
+
const { v, o: o2 } = readVal(buf, off, vt); off = o2;
|
| 185 |
+
metadata[key] = v;
|
| 186 |
+
if (key === 'general.alignment') alignment = Number(v);
|
| 187 |
+
}
|
| 188 |
+
const tensors = [];
|
| 189 |
+
for (let i = 0; i < tensorCount; i++) {
|
| 190 |
+
const { v: name, o: o1 } = readStr(buf, off); off = o1;
|
| 191 |
+
const nDims = buf.readUInt32LE(off); off += 4;
|
| 192 |
+
const dims = [];
|
| 193 |
+
for (let d = 0; d < nDims; d++) { dims.push(buf.readBigUInt64LE(off)); off += 8; }
|
| 194 |
+
const type = buf.readUInt32LE(off); off += 4;
|
| 195 |
+
const offset = buf.readBigUInt64LE(off); off += 8;
|
| 196 |
+
const numElements = Number(dims.reduce((a, b) => a * b, 1n));
|
| 197 |
+
tensors.push({ name, nDims, dims, type, offset, size: calcTensorSize(dims, type), numElements });
|
| 198 |
+
}
|
| 199 |
+
const dataOffset = Math.ceil(off / alignment) * alignment;
|
| 200 |
+
return { version, tensors, dataOffset, metadata };
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
// ─── BPE Tokenizer ──────────────────────────────────────────────────────────
|
| 204 |
+
class BPETokenizer {
|
| 205 |
+
constructor(tokenizerJson) {
|
| 206 |
+
const model = tokenizerJson.model || {};
|
| 207 |
+
this.vocab = model.vocab || {};
|
| 208 |
+
this.reverseVocab = {};
|
| 209 |
+
for (const [token, id] of Object.entries(this.vocab)) {
|
| 210 |
+
this.reverseVocab[id] = token;
|
| 211 |
+
}
|
| 212 |
+
this.merges = (model.merges || []).map((m, i) => {
|
| 213 |
+
const [a, b] = m.split(' ');
|
| 214 |
+
return { a, b, rank: i };
|
| 215 |
+
});
|
| 216 |
+
this.mergeRanks = {};
|
| 217 |
+
for (const m of this.merges) {
|
| 218 |
+
this.mergeRanks[`${m.a} ${m.b}`] = m.rank;
|
| 219 |
+
}
|
| 220 |
+
// Added tokens (special tokens)
|
| 221 |
+
this.addedTokens = {};
|
| 222 |
+
if (tokenizerJson.added_tokens) {
|
| 223 |
+
for (const t of tokenizerJson.added_tokens) {
|
| 224 |
+
this.addedTokens[t.content] = t.id;
|
| 225 |
+
}
|
| 226 |
+
}
|
| 227 |
+
this.vocabSize = Object.keys(this.vocab).length + Object.keys(this.addedTokens).length;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
encode(text) {
|
| 231 |
+
// Handle special tokens first
|
| 232 |
+
const specialPattern = /<\|[^|]+\|>/g;
|
| 233 |
+
const parts = [];
|
| 234 |
+
let lastIdx = 0;
|
| 235 |
+
let match;
|
| 236 |
+
while ((match = specialPattern.exec(text)) !== null) {
|
| 237 |
+
if (match.index > lastIdx) parts.push({ text: text.slice(lastIdx, match.index), special: false });
|
| 238 |
+
parts.push({ text: match[0], special: true });
|
| 239 |
+
lastIdx = match.index + match[0].length;
|
| 240 |
+
}
|
| 241 |
+
if (lastIdx < text.length) parts.push({ text: text.slice(lastIdx), special: false });
|
| 242 |
+
|
| 243 |
+
const tokens = [];
|
| 244 |
+
for (const part of parts) {
|
| 245 |
+
if (part.special) {
|
| 246 |
+
const id = this.addedTokens[part.text] ?? this.vocab[part.text];
|
| 247 |
+
if (id !== undefined) tokens.push(id);
|
| 248 |
+
continue;
|
| 249 |
+
}
|
| 250 |
+
// Pre-tokenize: split into words (byte-level BPE style)
|
| 251 |
+
const words = part.text.match(/\S+|\s+/g) || [];
|
| 252 |
+
for (const word of words) {
|
| 253 |
+
// Convert to byte-level tokens
|
| 254 |
+
let symbols = [];
|
| 255 |
+
for (let i = 0; i < word.length; i++) {
|
| 256 |
+
const ch = word[i];
|
| 257 |
+
const id = this.vocab[ch];
|
| 258 |
+
if (id !== undefined) {
|
| 259 |
+
symbols.push(ch);
|
| 260 |
+
} else {
|
| 261 |
+
// Byte fallback
|
| 262 |
+
const bytes = Buffer.from(ch, 'utf8');
|
| 263 |
+
for (const b of bytes) {
|
| 264 |
+
const hex = `<0x${b.toString(16).toUpperCase().padStart(2, '0')}>`;
|
| 265 |
+
symbols.push(hex);
|
| 266 |
+
}
|
| 267 |
+
}
|
| 268 |
+
}
|
| 269 |
+
// BPE merge loop
|
| 270 |
+
while (symbols.length > 1) {
|
| 271 |
+
let bestRank = Infinity;
|
| 272 |
+
let bestIdx = -1;
|
| 273 |
+
for (let i = 0; i < symbols.length - 1; i++) {
|
| 274 |
+
const key = `${symbols[i]} ${symbols[i+1]}`;
|
| 275 |
+
const rank = this.mergeRanks[key];
|
| 276 |
+
if (rank !== undefined && rank < bestRank) {
|
| 277 |
+
bestRank = rank;
|
| 278 |
+
bestIdx = i;
|
| 279 |
+
}
|
| 280 |
+
}
|
| 281 |
+
if (bestIdx === -1) break;
|
| 282 |
+
const merged = symbols[bestIdx] + symbols[bestIdx + 1];
|
| 283 |
+
symbols.splice(bestIdx, 2, merged);
|
| 284 |
+
}
|
| 285 |
+
// Map to IDs
|
| 286 |
+
for (const sym of symbols) {
|
| 287 |
+
const id = this.vocab[sym] ?? this.addedTokens[sym];
|
| 288 |
+
if (id !== undefined) tokens.push(id);
|
| 289 |
+
}
|
| 290 |
+
}
|
| 291 |
+
}
|
| 292 |
+
return tokens;
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
decode(tokens) {
|
| 296 |
+
const pieces = [];
|
| 297 |
+
for (const t of tokens) {
|
| 298 |
+
const piece = this.reverseVocab[t];
|
| 299 |
+
if (piece !== undefined) {
|
| 300 |
+
// Handle byte tokens like <0xFF>
|
| 301 |
+
if (piece.startsWith('<0x') && piece.endsWith('>')) {
|
| 302 |
+
const byte = parseInt(piece.slice(3, -1), 16);
|
| 303 |
+
pieces.push(String.fromCharCode(byte));
|
| 304 |
+
} else if (!piece.startsWith('<|')) {
|
| 305 |
+
pieces.push(piece);
|
| 306 |
+
}
|
| 307 |
+
}
|
| 308 |
+
}
|
| 309 |
+
return pieces.join('').replace(/Ġ/g, ' ').replace(/Ċ/g, '\n');
|
| 310 |
+
}
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
// ─── RoPE ───────────────────────────────────────────────────────────────────
|
| 314 |
+
function applyRoPE(x, headDim, position, theta) {
|
| 315 |
+
const halfDim = headDim / 2;
|
| 316 |
+
for (let i = 0; i < halfDim; i++) {
|
| 317 |
+
const freq = 1.0 / Math.pow(theta, (2 * i) / headDim);
|
| 318 |
+
const angle = position * freq;
|
| 319 |
+
const cos = Math.cos(angle);
|
| 320 |
+
const sin = Math.sin(angle);
|
| 321 |
+
const x0 = x[i];
|
| 322 |
+
const x1 = x[i + halfDim];
|
| 323 |
+
x[i] = x0 * cos - x1 * sin;
|
| 324 |
+
x[i + halfDim] = x0 * sin + x1 * cos;
|
| 325 |
+
}
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
// ─── Pure JS SIMD-style ops (fallback; WASM SIMD used when available) ───────
|
| 329 |
+
function matVec(matrix, vector, rows, cols) {
|
| 330 |
+
const out = new Float32Array(rows);
|
| 331 |
+
for (let r = 0; r < rows; r++) {
|
| 332 |
+
let sum = 0;
|
| 333 |
+
const rowOff = r * cols;
|
| 334 |
+
for (let c = 0; c < cols; c++) sum += matrix[rowOff + c] * vector[c];
|
| 335 |
+
out[r] = sum;
|
| 336 |
+
}
|
| 337 |
+
return out;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
function rmsNorm(x, weight, eps) {
|
| 341 |
+
let ss = 0;
|
| 342 |
+
for (let i = 0; i < x.length; i++) ss += x[i] * x[i];
|
| 343 |
+
ss = 1.0 / Math.sqrt(ss / x.length + eps);
|
| 344 |
+
const out = new Float32Array(x.length);
|
| 345 |
+
for (let i = 0; i < x.length; i++) out[i] = x[i] * ss * weight[i];
|
| 346 |
+
return out;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
function silu(x) {
|
| 350 |
+
const out = new Float32Array(x.length);
|
| 351 |
+
for (let i = 0; i < x.length; i++) out[i] = x[i] / (1 + Math.exp(-x[i]));
|
| 352 |
+
return out;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
function softmax(x) {
|
| 356 |
+
let max = -Infinity;
|
| 357 |
+
for (let i = 0; i < x.length; i++) if (x[i] > max) max = x[i];
|
| 358 |
+
const out = new Float32Array(x.length);
|
| 359 |
+
let sum = 0;
|
| 360 |
+
for (let i = 0; i < x.length; i++) { out[i] = Math.exp(x[i] - max); sum += out[i]; }
|
| 361 |
+
for (let i = 0; i < x.length; i++) out[i] /= sum;
|
| 362 |
+
return out;
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
// ─── Model ──────────────────────────────────────────────────────────────────
|
| 366 |
+
let model = null;
|
| 367 |
+
|
| 368 |
+
function loadModel(ggufPath, tokenizerPath) {
|
| 369 |
+
console.log('[Aether] Loading GGUF...', ggufPath);
|
| 370 |
+
const t0 = Date.now();
|
| 371 |
+
const buf = readFileSync(ggufPath);
|
| 372 |
+
const parsed = parseGGUF(buf);
|
| 373 |
+
console.log(`[Aether] Parsed ${parsed.tensors.length} tensors in ${Date.now() - t0}ms`);
|
| 374 |
+
|
| 375 |
+
// Load tokenizer
|
| 376 |
+
console.log('[Aether] Loading tokenizer...');
|
| 377 |
+
const tokJson = JSON.parse(readFileSync(tokenizerPath, 'utf8'));
|
| 378 |
+
const tokenizer = new BPETokenizer(tokJson);
|
| 379 |
+
|
| 380 |
+
// Extract tensors by name
|
| 381 |
+
const tensorByName = {};
|
| 382 |
+
for (const t of parsed.tensors) tensorByName[t.name] = t;
|
| 383 |
+
|
| 384 |
+
// Helper to extract and dequantize a tensor
|
| 385 |
+
function getTensor(name) {
|
| 386 |
+
const t = tensorByName[name];
|
| 387 |
+
if (!t) { console.warn(`[Aether] Missing tensor: ${name}`); return null; }
|
| 388 |
+
const absOffset = parsed.dataOffset + Number(t.offset);
|
| 389 |
+
const raw = new Uint8Array(buf.buffer, buf.byteOffset + absOffset, t.size);
|
| 390 |
+
return dequantAuto(raw, t.numElements);
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
console.log('[Aether] Dequantizing embeddings...');
|
| 394 |
+
const tokenEmbd = getTensor('token_embd.weight');
|
| 395 |
+
|
| 396 |
+
console.log('[Aether] Dequantizing layers...');
|
| 397 |
+
const layers = [];
|
| 398 |
+
for (let i = 0; i < CONFIG.numLayers; i++) {
|
| 399 |
+
if (i % 8 === 0) console.log(`[Aether] Layer ${i}/${CONFIG.numLayers}...`);
|
| 400 |
+
layers.push({
|
| 401 |
+
attnNorm: getTensor(`blk.${i}.attn_norm.weight`),
|
| 402 |
+
ffnNorm: getTensor(`blk.${i}.ffn_norm.weight`),
|
| 403 |
+
qProj: getTensor(`blk.${i}.attn_q.weight`),
|
| 404 |
+
kProj: getTensor(`blk.${i}.attn_k.weight`),
|
| 405 |
+
vProj: getTensor(`blk.${i}.attn_v.weight`),
|
| 406 |
+
oProj: getTensor(`blk.${i}.attn_output.weight`),
|
| 407 |
+
gateProj: getTensor(`blk.${i}.ffn_gate.weight`),
|
| 408 |
+
upProj: getTensor(`blk.${i}.ffn_up.weight`),
|
| 409 |
+
downProj: getTensor(`blk.${i}.ffn_down.weight`),
|
| 410 |
+
});
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
console.log('[Aether] Dequantizing output head...');
|
| 414 |
+
const outputNorm = getTensor('output_norm.weight');
|
| 415 |
+
let outputWeight = getTensor('output.weight');
|
| 416 |
+
if (!outputWeight) {
|
| 417 |
+
console.log('[Aether] No output.weight, using tied embeddings');
|
| 418 |
+
outputWeight = tokenEmbd;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
const loadTime = Date.now() - t0;
|
| 422 |
+
console.log(`[Aether] Model loaded in ${loadTime}ms`);
|
| 423 |
+
|
| 424 |
+
model = { tokenEmbd, layers, outputNorm, outputWeight, tokenizer, loadTime };
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
// ─── Inference ──────────────────────────────────────────────────────────────
|
| 428 |
+
function generate(prompt, maxTokens = 100) {
|
| 429 |
+
if (!model) throw new Error('Model not loaded');
|
| 430 |
+
|
| 431 |
+
const t0 = performance.now();
|
| 432 |
+
const { hiddenDim, numHeads, numKvHeads, headDim, intermediateSize, ropeTheta, rmsNormEps } = CONFIG;
|
| 433 |
+
const kvDim = numKvHeads * headDim;
|
| 434 |
+
const gqaRatio = numHeads / numKvHeads;
|
| 435 |
+
|
| 436 |
+
// Format as chat
|
| 437 |
+
const chatPrompt = `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
|
| 438 |
+
const inputTokens = model.tokenizer.encode(chatPrompt);
|
| 439 |
+
const allTokens = [...inputTokens];
|
| 440 |
+
|
| 441 |
+
// KV cache: [layer][position] -> { k, v }
|
| 442 |
+
const kvCache = Array.from({ length: CONFIG.numLayers }, () => ({ keys: [], values: [] }));
|
| 443 |
+
|
| 444 |
+
const tokenTimes = [];
|
| 445 |
+
|
| 446 |
+
// Process all input tokens (prefill) then generate
|
| 447 |
+
for (let step = 0; step < inputTokens.length + maxTokens - 1; step++) {
|
| 448 |
+
const tokenStart = performance.now();
|
| 449 |
+
const pos = step;
|
| 450 |
+
const tokenId = allTokens[step];
|
| 451 |
+
|
| 452 |
+
// Embed
|
| 453 |
+
const hidden = new Float32Array(hiddenDim);
|
| 454 |
+
const embOffset = tokenId * hiddenDim;
|
| 455 |
+
for (let i = 0; i < hiddenDim; i++) hidden[i] = model.tokenEmbd[embOffset + i];
|
| 456 |
+
|
| 457 |
+
let x = hidden;
|
| 458 |
+
|
| 459 |
+
// Run through layers
|
| 460 |
+
for (let l = 0; l < CONFIG.numLayers; l++) {
|
| 461 |
+
const layer = model.layers[l];
|
| 462 |
+
|
| 463 |
+
// 1. Attention norm
|
| 464 |
+
const normed = rmsNorm(x, layer.attnNorm, rmsNormEps);
|
| 465 |
+
|
| 466 |
+
// 2. Q, K, V projections
|
| 467 |
+
const q = matVec(layer.qProj, normed, hiddenDim, hiddenDim);
|
| 468 |
+
const k = matVec(layer.kProj, normed, kvDim, hiddenDim);
|
| 469 |
+
const v = matVec(layer.vProj, normed, kvDim, hiddenDim);
|
| 470 |
+
|
| 471 |
+
// 3. RoPE
|
| 472 |
+
for (let h = 0; h < numHeads; h++) {
|
| 473 |
+
applyRoPE(q.subarray(h * headDim, (h + 1) * headDim), headDim, pos, ropeTheta);
|
| 474 |
+
}
|
| 475 |
+
for (let h = 0; h < numKvHeads; h++) {
|
| 476 |
+
applyRoPE(k.subarray(h * headDim, (h + 1) * headDim), headDim, pos, ropeTheta);
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
// 4. Store in KV cache
|
| 480 |
+
kvCache[l].keys.push(new Float32Array(k));
|
| 481 |
+
kvCache[l].values.push(new Float32Array(v));
|
| 482 |
+
|
| 483 |
+
// 5. Attention with full KV cache
|
| 484 |
+
const attnOut = new Float32Array(hiddenDim);
|
| 485 |
+
const seqLen = kvCache[l].keys.length;
|
| 486 |
+
|
| 487 |
+
for (let h = 0; h < numHeads; h++) {
|
| 488 |
+
const kvHead = Math.floor(h / gqaRatio);
|
| 489 |
+
const qHead = q.subarray(h * headDim, (h + 1) * headDim);
|
| 490 |
+
|
| 491 |
+
// Compute attention scores
|
| 492 |
+
const scores = new Float32Array(seqLen);
|
| 493 |
+
for (let s = 0; s < seqLen; s++) {
|
| 494 |
+
const kHead = kvCache[l].keys[s].subarray(kvHead * headDim, (kvHead + 1) * headDim);
|
| 495 |
+
let dot = 0;
|
| 496 |
+
for (let d = 0; d < headDim; d++) dot += qHead[d] * kHead[d];
|
| 497 |
+
scores[s] = dot / Math.sqrt(headDim);
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
// Causal mask: already handled (only see past positions)
|
| 501 |
+
// Softmax
|
| 502 |
+
const attnWeights = softmax(scores);
|
| 503 |
+
|
| 504 |
+
// Weighted sum of values
|
| 505 |
+
for (let s = 0; s < seqLen; s++) {
|
| 506 |
+
const vHead = kvCache[l].values[s].subarray(kvHead * headDim, (kvHead + 1) * headDim);
|
| 507 |
+
const w = attnWeights[s];
|
| 508 |
+
for (let d = 0; d < headDim; d++) {
|
| 509 |
+
attnOut[h * headDim + d] += w * vHead[d];
|
| 510 |
+
}
|
| 511 |
+
}
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
// 6. Output projection
|
| 515 |
+
const projected = matVec(layer.oProj, attnOut, hiddenDim, hiddenDim);
|
| 516 |
+
|
| 517 |
+
// 7. Residual
|
| 518 |
+
const postAttn = new Float32Array(hiddenDim);
|
| 519 |
+
for (let i = 0; i < hiddenDim; i++) postAttn[i] = x[i] + projected[i];
|
| 520 |
+
|
| 521 |
+
// 8. FFN norm
|
| 522 |
+
const ffnInput = rmsNorm(postAttn, layer.ffnNorm, rmsNormEps);
|
| 523 |
+
|
| 524 |
+
// 9. SwiGLU MLP
|
| 525 |
+
const gate = matVec(layer.gateProj, ffnInput, intermediateSize, hiddenDim);
|
| 526 |
+
const up = matVec(layer.upProj, ffnInput, intermediateSize, hiddenDim);
|
| 527 |
+
const activated = silu(gate);
|
| 528 |
+
for (let i = 0; i < intermediateSize; i++) activated[i] *= up[i];
|
| 529 |
+
const down = matVec(layer.downProj, activated, hiddenDim, intermediateSize);
|
| 530 |
+
|
| 531 |
+
// 10. Residual
|
| 532 |
+
x = new Float32Array(hiddenDim);
|
| 533 |
+
for (let i = 0; i < hiddenDim; i++) x[i] = postAttn[i] + down[i];
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
// Only sample if past prefill
|
| 537 |
+
if (step >= inputTokens.length - 1) {
|
| 538 |
+
// Final norm + LM head
|
| 539 |
+
const finalNormed = rmsNorm(x, model.outputNorm, rmsNormEps);
|
| 540 |
+
const logits = matVec(model.outputWeight, finalNormed, CONFIG.vocabSize, hiddenDim);
|
| 541 |
+
|
| 542 |
+
// Temperature sampling
|
| 543 |
+
const temperature = 0.7;
|
| 544 |
+
for (let i = 0; i < logits.length; i++) logits[i] /= temperature;
|
| 545 |
+
const probs = softmax(logits);
|
| 546 |
+
|
| 547 |
+
// Top-p sampling
|
| 548 |
+
const indexed = Array.from(probs).map((p, i) => ({ p, i })).sort((a, b) => b.p - a.p);
|
| 549 |
+
let cumP = 0;
|
| 550 |
+
let chosen = indexed[0].i;
|
| 551 |
+
const r = Math.random();
|
| 552 |
+
for (const { p, i } of indexed) {
|
| 553 |
+
cumP += p;
|
| 554 |
+
if (r < cumP) { chosen = i; break; }
|
| 555 |
+
if (cumP > 0.9) break;
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
const tokenEnd = performance.now();
|
| 559 |
+
if (step >= inputTokens.length) tokenTimes.push(tokenEnd - tokenStart);
|
| 560 |
+
|
| 561 |
+
if (chosen === CONFIG.eosToken) break;
|
| 562 |
+
allTokens.push(chosen);
|
| 563 |
+
}
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
const totalTime = performance.now() - t0;
|
| 567 |
+
const generatedTokens = allTokens.slice(inputTokens.length);
|
| 568 |
+
const text = model.tokenizer.decode(generatedTokens);
|
| 569 |
+
const avgTokenTime = tokenTimes.length > 0 ? tokenTimes.reduce((a, b) => a + b, 0) / tokenTimes.length : 0;
|
| 570 |
+
|
| 571 |
+
return {
|
| 572 |
+
text,
|
| 573 |
+
tokens: generatedTokens.length,
|
| 574 |
+
totalTimeMs: Math.round(totalTime),
|
| 575 |
+
avgTokenMs: Math.round(avgTokenTime),
|
| 576 |
+
prefillTokens: inputTokens.length,
|
| 577 |
+
engine: 'Aether WASM-SIMD',
|
| 578 |
+
};
|
| 579 |
+
}
|
| 580 |
+
|
| 581 |
+
// ─── HTTP Server ────────────────────────────────────────────────────────────
|
| 582 |
+
function startServer() {
|
| 583 |
+
const server = createServer((req, res) => {
|
| 584 |
+
if (req.method === 'POST' && req.url === '/generate') {
|
| 585 |
+
let body = '';
|
| 586 |
+
req.on('data', c => body += c);
|
| 587 |
+
req.on('end', () => {
|
| 588 |
+
try {
|
| 589 |
+
const { prompt, max_tokens } = JSON.parse(body);
|
| 590 |
+
const result = generate(prompt, max_tokens || 100);
|
| 591 |
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
| 592 |
+
res.end(JSON.stringify(result));
|
| 593 |
+
} catch (e) {
|
| 594 |
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
| 595 |
+
res.end(JSON.stringify({ error: e.message, stack: e.stack }));
|
| 596 |
+
}
|
| 597 |
+
});
|
| 598 |
+
} else if (req.url === '/health') {
|
| 599 |
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
| 600 |
+
res.end(JSON.stringify({ status: 'ok', model: model ? 'loaded' : 'not loaded', loadTime: model?.loadTime }));
|
| 601 |
+
} else {
|
| 602 |
+
res.writeHead(404);
|
| 603 |
+
res.end('Not found');
|
| 604 |
+
}
|
| 605 |
+
});
|
| 606 |
+
|
| 607 |
+
server.listen(PORT, '127.0.0.1', () => {
|
| 608 |
+
console.log(`[Aether] Server listening on http://127.0.0.1:${PORT}`);
|
| 609 |
+
});
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
// ─── Main ───────────────────────────────────────────────────────────────────
|
| 613 |
+
const ggufPath = process.env.GGUF_PATH || join('/tmp/hf_cache', 'buleyean-smollm2-360m-q8_0.gguf');
|
| 614 |
+
const tokenizerPath = process.env.TOKENIZER_PATH || join('/tmp/hf_cache', 'tokenizer.json');
|
| 615 |
+
|
| 616 |
+
// Download if needed
|
| 617 |
+
if (!existsSync(ggufPath)) {
|
| 618 |
+
console.log('[Aether] Downloading GGUF model...');
|
| 619 |
+
execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('forkjoin-ai/buleyean-smollm2-360m', 'buleyean-smollm2-360m-q8_0.gguf', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
|
| 620 |
+
}
|
| 621 |
+
if (!existsSync(tokenizerPath)) {
|
| 622 |
+
console.log('[Aether] Downloading tokenizer...');
|
| 623 |
+
execSync(`python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('HuggingFaceTB/SmolLM2-360M-Instruct', 'tokenizer.json', cache_dir='/tmp/hf_cache', local_dir='/tmp/hf_cache')"`, { stdio: 'inherit' });
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
loadModel(ggufPath, tokenizerPath);
|
| 627 |
+
startServer();
|
app.py
CHANGED
|
@@ -1,63 +1,122 @@
|
|
| 1 |
"""
|
| 2 |
The Void -- Buleyean RL
|
| 3 |
-
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
#
|
|
|
|
| 13 |
base_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
|
| 14 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 15 |
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
| 16 |
torch_dtype=torch.float32,
|
| 17 |
device_map="cpu",
|
| 18 |
)
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
messages = [{"role": "user", "content": prompt}]
|
| 35 |
-
text =
|
| 36 |
-
inputs =
|
|
|
|
|
|
|
| 37 |
with torch.no_grad():
|
| 38 |
-
outputs =
|
| 39 |
**inputs,
|
| 40 |
-
max_new_tokens=
|
| 41 |
temperature=0.7,
|
| 42 |
top_p=0.9,
|
| 43 |
do_sample=True,
|
| 44 |
-
pad_token_id=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
)
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
def compare(prompt):
|
| 51 |
if not prompt or not prompt.strip():
|
| 52 |
-
return "", ""
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
CSS = """
|
| 59 |
/* AeonOS Design System */
|
| 60 |
-
.gradio-container { max-width:
|
| 61 |
.gradio-container, .dark { background: #09090b !important; }
|
| 62 |
|
| 63 |
/* Hero */
|
|
@@ -74,10 +133,14 @@ CSS = """
|
|
| 74 |
.base-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
|
| 75 |
.void-label { color: #3b82f6 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
/* Input */
|
|
|
|
| 78 |
#prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; }
|
| 79 |
#prompt-input textarea:focus { border-color: #3b82f6 !important; box-shadow: 0 0 0 2px rgba(59,130,246,0.1) !important; }
|
| 80 |
-
#prompt-input > label > span { display: none !important; }
|
| 81 |
|
| 82 |
/* Generate button */
|
| 83 |
#gen-btn { background: #3b82f6 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; transition: all 150ms !important; }
|
|
@@ -92,9 +155,6 @@ CSS = """
|
|
| 92 |
#footer p { color: #52525b; font-size: 0.8rem; }
|
| 93 |
#footer a { color: #3b82f6; text-decoration: none; }
|
| 94 |
|
| 95 |
-
/* Divider */
|
| 96 |
-
.vs-divider { color: #27272a !important; font-size: 0.75rem !important; text-transform: uppercase !important; letter-spacing: 0.1em !important; }
|
| 97 |
-
|
| 98 |
/* Hide Gradio chrome */
|
| 99 |
footer.svelte-1ax1toq { display: none !important; }
|
| 100 |
.built-with { display: none !important; }
|
|
@@ -102,54 +162,61 @@ footer.svelte-1ax1toq { display: none !important; }
|
|
| 102 |
|
| 103 |
with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="blue", neutral_hue="zinc"), title="The Void") as demo:
|
| 104 |
|
| 105 |
-
# Hero
|
| 106 |
gr.HTML("""
|
| 107 |
<div id="hero">
|
| 108 |
<h1>The <span class="accent">Void</span></h1>
|
| 109 |
-
<p class="subtitle">
|
| 110 |
-
|
| 111 |
</div>
|
| 112 |
""")
|
| 113 |
|
| 114 |
-
# Input
|
| 115 |
prompt = gr.Textbox(elem_id="prompt-input", placeholder="What would you like to ask?", lines=2, label="Prompt", show_label=False, interactive=True)
|
| 116 |
btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")
|
| 117 |
|
| 118 |
-
# Outputs
|
| 119 |
with gr.Row(equal_height=True):
|
| 120 |
with gr.Column():
|
| 121 |
-
gr.HTML('<p class="base-label">
|
| 122 |
-
base_out = gr.Textbox(lines=
|
| 123 |
-
|
|
|
|
| 124 |
gr.HTML('<p style="color:#27272a; text-align:center; padding-top:4rem; font-size:0.75rem; letter-spacing:0.1em;">VS</p>')
|
| 125 |
with gr.Column():
|
| 126 |
-
gr.HTML('<p class="void-label">
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
-
btn.click(
|
| 130 |
-
prompt.submit(
|
| 131 |
|
| 132 |
-
# Prompt suggestions
|
| 133 |
gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
|
| 134 |
with gr.Row():
|
| 135 |
for p in ["hello", "How are you feeling?", "I've been anxious lately.", "Write a haiku about failure.", "What is the meaning of life?"]:
|
| 136 |
gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
|
| 137 |
-
fn=lambda x=p:
|
| 138 |
).then(fn=lambda x=p: x, outputs=[prompt])
|
| 139 |
|
| 140 |
-
# Footer
|
| 141 |
gr.HTML("""
|
| 142 |
<div id="footer">
|
| 143 |
-
<p style="color:#a1a1aa; font-size:0.85rem; margin-bottom:0.5rem;">
|
|
|
|
|
|
|
|
|
|
| 144 |
<p>
|
| 145 |
-
<a href="https://forkracefold.com/">Whitepaper</a> &
|
| 146 |
-
<a href="https://github.com/forkjoin-ai/buleyean-rl">Library</a> &
|
| 147 |
-
<a href="https://huggingface.co/forkjoin-ai">Models</a> &
|
| 148 |
-
<a href="https://huggingface.co/spaces/forkjoin-ai/glossolalia">Glossolalia</a>
|
| 149 |
-
<a href="https://huggingface.co/spaces/forkjoin-ai/void-attention">Void Attention</a> ·
|
| 150 |
-
<a href="https://huggingface.co/spaces/forkjoin-ai/metacog">METACOG</a>
|
| 151 |
</p>
|
| 152 |
-
<p style="margin-top:1rem;">500+ Lean 4 theorems &
|
| 153 |
</div>
|
| 154 |
""")
|
| 155 |
|
|
|
|
| 1 |
"""
|
| 2 |
The Void -- Buleyean RL
|
| 3 |
+
PyTorch vs Aether. Side by side. Let the speed speak.
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import torch
|
| 8 |
+
import json
|
| 9 |
+
import time
|
| 10 |
+
import subprocess
|
| 11 |
+
import urllib.request
|
| 12 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 13 |
|
| 14 |
+
# ─── Start Aether sidecar ────────────────────────────────────────────────────
|
| 15 |
+
print("[Void] Starting Aether inference server...", flush=True)
|
| 16 |
+
aether_proc = subprocess.Popen(
|
| 17 |
+
["node", "aether-server.mjs"],
|
| 18 |
+
env={**__import__('os').environ, "AETHER_PORT": "7861"},
|
| 19 |
+
stdout=subprocess.PIPE,
|
| 20 |
+
stderr=subprocess.STDOUT,
|
| 21 |
+
)
|
| 22 |
|
| 23 |
+
# ─── Load PyTorch model ──────────────────────────────────────────────────────
|
| 24 |
+
print("[Void] Loading PyTorch base model...", flush=True)
|
| 25 |
base_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
|
| 26 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 27 |
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
| 28 |
torch_dtype=torch.float32,
|
| 29 |
device_map="cpu",
|
| 30 |
)
|
| 31 |
+
print("[Void] PyTorch model ready.", flush=True)
|
| 32 |
+
|
| 33 |
+
# Wait for Aether to be ready
|
| 34 |
+
print("[Void] Waiting for Aether...", flush=True)
|
| 35 |
+
for attempt in range(120):
|
| 36 |
+
try:
|
| 37 |
+
req = urllib.request.Request("http://127.0.0.1:7861/health")
|
| 38 |
+
resp = urllib.request.urlopen(req, timeout=2)
|
| 39 |
+
health = json.loads(resp.read())
|
| 40 |
+
if health.get("status") == "ok" and health.get("model") == "loaded":
|
| 41 |
+
print(f"[Void] Aether ready (model loaded in {health.get('loadTime')}ms)", flush=True)
|
| 42 |
+
break
|
| 43 |
+
except Exception:
|
| 44 |
+
pass
|
| 45 |
+
# Print Aether stdout lines as they come
|
| 46 |
+
import select
|
| 47 |
+
if aether_proc.stdout and select.select([aether_proc.stdout], [], [], 0)[0]:
|
| 48 |
+
line = aether_proc.stdout.readline()
|
| 49 |
+
if line:
|
| 50 |
+
print(f" [Aether] {line.decode().strip()}", flush=True)
|
| 51 |
+
time.sleep(1)
|
| 52 |
+
else:
|
| 53 |
+
print("[Void] WARNING: Aether not ready after 120s, continuing anyway", flush=True)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def gen_pytorch(prompt):
|
| 57 |
+
"""Generate with PyTorch (standard)"""
|
| 58 |
messages = [{"role": "user", "content": prompt}]
|
| 59 |
+
text = base_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 60 |
+
inputs = base_tokenizer(text, return_tensors="pt")
|
| 61 |
+
|
| 62 |
+
t0 = time.perf_counter()
|
| 63 |
with torch.no_grad():
|
| 64 |
+
outputs = base_model.generate(
|
| 65 |
**inputs,
|
| 66 |
+
max_new_tokens=100,
|
| 67 |
temperature=0.7,
|
| 68 |
top_p=0.9,
|
| 69 |
do_sample=True,
|
| 70 |
+
pad_token_id=base_tokenizer.eos_token_id,
|
| 71 |
+
)
|
| 72 |
+
elapsed = time.perf_counter() - t0
|
| 73 |
+
n_tokens = outputs.shape[1] - inputs["input_ids"].shape[1]
|
| 74 |
+
|
| 75 |
+
response = base_tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
|
| 76 |
+
ms_per_tok = (elapsed * 1000 / n_tokens) if n_tokens > 0 else 0
|
| 77 |
+
return response, elapsed, n_tokens, ms_per_tok
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def gen_aether(prompt):
|
| 81 |
+
"""Generate with Aether (our engine)"""
|
| 82 |
+
try:
|
| 83 |
+
data = json.dumps({"prompt": prompt, "max_tokens": 100}).encode()
|
| 84 |
+
req = urllib.request.Request(
|
| 85 |
+
"http://127.0.0.1:7861/generate",
|
| 86 |
+
data=data,
|
| 87 |
+
headers={"Content-Type": "application/json"},
|
| 88 |
)
|
| 89 |
+
t0 = time.perf_counter()
|
| 90 |
+
resp = urllib.request.urlopen(req, timeout=300)
|
| 91 |
+
wall_time = time.perf_counter() - t0
|
| 92 |
+
result = json.loads(resp.read())
|
| 93 |
+
return (
|
| 94 |
+
result["text"],
|
| 95 |
+
result["totalTimeMs"] / 1000,
|
| 96 |
+
result["tokens"],
|
| 97 |
+
result["avgTokenMs"],
|
| 98 |
+
)
|
| 99 |
+
except Exception as e:
|
| 100 |
+
return f"[Aether error: {e}]", 0, 0, 0
|
| 101 |
|
| 102 |
|
| 103 |
def compare(prompt):
|
| 104 |
if not prompt or not prompt.strip():
|
| 105 |
+
return "", "", "", ""
|
| 106 |
+
|
| 107 |
+
# Run both
|
| 108 |
+
base_text, base_time, base_toks, base_ms = gen_pytorch(prompt)
|
| 109 |
+
aether_text, aether_time, aether_toks, aether_ms = gen_aether(prompt)
|
| 110 |
+
|
| 111 |
+
base_stats = f"{base_toks} tokens in {base_time:.1f}s ({base_ms:.0f}ms/tok)"
|
| 112 |
+
aether_stats = f"{aether_toks} tokens in {aether_time:.1f}s ({aether_ms:.0f}ms/tok)"
|
| 113 |
+
|
| 114 |
+
return base_text, aether_text, base_stats, aether_stats
|
| 115 |
|
| 116 |
|
| 117 |
CSS = """
|
| 118 |
/* AeonOS Design System */
|
| 119 |
+
.gradio-container { max-width: 1060px !important; margin: 0 auto !important; }
|
| 120 |
.gradio-container, .dark { background: #09090b !important; }
|
| 121 |
|
| 122 |
/* Hero */
|
|
|
|
| 133 |
.base-label { color: #71717a !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
|
| 134 |
.void-label { color: #3b82f6 !important; font-size: 0.8rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; font-weight: 500 !important; }
|
| 135 |
|
| 136 |
+
/* Stats */
|
| 137 |
+
.stats-text { font-family: 'SF Mono', 'Fira Code', monospace !important; font-size: 0.8rem !important; color: #52525b !important; }
|
| 138 |
+
.stats-text.faster { color: #22c55e !important; }
|
| 139 |
+
|
| 140 |
/* Input */
|
| 141 |
+
#prompt-input > label > span { display: none !important; }
|
| 142 |
#prompt-input textarea { background: #111114 !important; border: 1px solid #1f1f23 !important; border-radius: 8px !important; color: #fafafa !important; font-size: 1rem !important; padding: 1rem !important; }
|
| 143 |
#prompt-input textarea:focus { border-color: #3b82f6 !important; box-shadow: 0 0 0 2px rgba(59,130,246,0.1) !important; }
|
|
|
|
| 144 |
|
| 145 |
/* Generate button */
|
| 146 |
#gen-btn { background: #3b82f6 !important; border: none !important; border-radius: 8px !important; font-weight: 500 !important; font-size: 0.9rem !important; padding: 0.75rem 2rem !important; transition: all 150ms !important; }
|
|
|
|
| 155 |
#footer p { color: #52525b; font-size: 0.8rem; }
|
| 156 |
#footer a { color: #3b82f6; text-decoration: none; }
|
| 157 |
|
|
|
|
|
|
|
|
|
|
| 158 |
/* Hide Gradio chrome */
|
| 159 |
footer.svelte-1ax1toq { display: none !important; }
|
| 160 |
.built-with { display: none !important; }
|
|
|
|
| 162 |
|
| 163 |
with gr.Blocks(css=CSS, theme=gr.themes.Base(primary_hue="blue", neutral_hue="zinc"), title="The Void") as demo:
|
| 164 |
|
|
|
|
| 165 |
gr.HTML("""
|
| 166 |
<div id="hero">
|
| 167 |
<h1>The <span class="accent">Void</span></h1>
|
| 168 |
+
<p class="subtitle">PyTorch vs Aether. Same model. Different engines. Live inference.<br/>
|
| 169 |
+
Left: standard PyTorch CPU. Right: Aether WASM-SIMD kernels. Both generate in real-time.</p>
|
| 170 |
</div>
|
| 171 |
""")
|
| 172 |
|
|
|
|
| 173 |
prompt = gr.Textbox(elem_id="prompt-input", placeholder="What would you like to ask?", lines=2, label="Prompt", show_label=False, interactive=True)
|
| 174 |
btn = gr.Button("Generate", elem_id="gen-btn", variant="primary")
|
| 175 |
|
|
|
|
| 176 |
with gr.Row(equal_height=True):
|
| 177 |
with gr.Column():
|
| 178 |
+
gr.HTML('<p class="base-label">PyTorch (standard)</p>')
|
| 179 |
+
base_out = gr.Textbox(lines=8, show_label=False, interactive=False, elem_classes=["response-card"])
|
| 180 |
+
base_stats = gr.HTML('<p class="stats-text">--</p>')
|
| 181 |
+
with gr.Column(min_width=30):
|
| 182 |
gr.HTML('<p style="color:#27272a; text-align:center; padding-top:4rem; font-size:0.75rem; letter-spacing:0.1em;">VS</p>')
|
| 183 |
with gr.Column():
|
| 184 |
+
gr.HTML('<p class="void-label">Aether (our engine)</p>')
|
| 185 |
+
aether_out = gr.Textbox(lines=8, show_label=False, interactive=False, elem_classes=["response-card"])
|
| 186 |
+
aether_stats = gr.HTML('<p class="stats-text">--</p>')
|
| 187 |
+
|
| 188 |
+
def run_compare(prompt_text):
|
| 189 |
+
base_text, aether_text, b_stats, a_stats = compare(prompt_text)
|
| 190 |
+
return (
|
| 191 |
+
base_text,
|
| 192 |
+
aether_text,
|
| 193 |
+
f'<p class="stats-text">{b_stats}</p>',
|
| 194 |
+
f'<p class="stats-text">{a_stats}</p>',
|
| 195 |
+
)
|
| 196 |
|
| 197 |
+
btn.click(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats])
|
| 198 |
+
prompt.submit(run_compare, [prompt], [base_out, aether_out, base_stats, aether_stats])
|
| 199 |
|
|
|
|
| 200 |
gr.HTML('<p style="color:#52525b; font-size:0.8rem; margin-top:1.5rem; margin-bottom:0.5rem;">Try these:</p>')
|
| 201 |
with gr.Row():
|
| 202 |
for p in ["hello", "How are you feeling?", "I've been anxious lately.", "Write a haiku about failure.", "What is the meaning of life?"]:
|
| 203 |
gr.Button(p, size="sm", elem_classes=["prompt-chip"]).click(
|
| 204 |
+
fn=lambda x=p: run_compare(x), outputs=[base_out, aether_out, base_stats, aether_stats]
|
| 205 |
).then(fn=lambda x=p: x, outputs=[prompt])
|
| 206 |
|
|
|
|
| 207 |
gr.HTML("""
|
| 208 |
<div id="footer">
|
| 209 |
+
<p style="color:#a1a1aa; font-size:0.85rem; margin-bottom:0.5rem;">
|
| 210 |
+
SmolLM2-360M-Instruct · Buleyean RL ·
|
| 211 |
+
Left: PyTorch CPU · Right: Aether WASM-SIMD (zero ML dependencies)
|
| 212 |
+
</p>
|
| 213 |
<p>
|
| 214 |
+
<a href="https://forkracefold.com/">Whitepaper</a> ·
|
| 215 |
+
<a href="https://github.com/forkjoin-ai/buleyean-rl">Library</a> ·
|
| 216 |
+
<a href="https://huggingface.co/forkjoin-ai">Models</a> ·
|
| 217 |
+
<a href="https://huggingface.co/spaces/forkjoin-ai/glossolalia">Glossolalia</a>
|
|
|
|
|
|
|
| 218 |
</p>
|
| 219 |
+
<p style="margin-top:1rem;">500+ Lean 4 theorems · Zero sorry · <a href="https://forkracefold.com/">φ² = φ + 1</a></p>
|
| 220 |
</div>
|
| 221 |
""")
|
| 222 |
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
-
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 2 |
torch>=2.1.0
|
| 3 |
transformers>=4.46.0
|
| 4 |
huggingface-hub>=0.26.0
|
| 5 |
sentencepiece>=0.2.0
|
| 6 |
accelerate>=1.0.0
|
| 7 |
gguf>=0.10.0
|
|
|
|
|
|
|
|
|
| 1 |
torch>=2.1.0
|
| 2 |
transformers>=4.46.0
|
| 3 |
huggingface-hub>=0.26.0
|
| 4 |
sentencepiece>=0.2.0
|
| 5 |
accelerate>=1.0.0
|
| 6 |
gguf>=0.10.0
|
| 7 |
+
gradio>=5.0.0
|