pplx-embed-v1-0.6b-GGUF

import numpy as np
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim


model = SentenceTransformer(
    "perplexity-ai/pplx-embed-v1-0.6b",
    trust_remote_code=True
)
llama = Llama.from_pretrained(
    repo_id="mykor/pplx-embed-v1-0.6b-GGUF",
    filename="pplx-embed-v1-0.6B-F32.gguf",
    verbose=False,
    embedding=True,
    n_ctx=0,
)

text = """์ด์   ๊นจ์–ด๋‚˜ ์„ค๋ ˆ๋Š” ์ด๋ง˜ ๋”ฐ๋ผ
๊ธฐ๋‹ค๋ฆฌ๋˜ ์˜ค๋Š˜์„ ๋งŒ๋“ค ๊ฑฐ์•ผ

์ด ๋งŽ์€ ์‹œ๊ฐ„์„ ๋‚œ ํ˜๋ฆฌ๊ณ  ์‹ถ์ง€๋Š” ์•Š์•„
์ด์ œ ๋‚˜๋Š” ๋ถ„๋ช…ํžˆ ์ค€๋น„๊ฐ€ ๋ผ์žˆ๋Š”๋ฐ
์–ด๋–ค ๋ฌด๊ธฐ๋ ฅํ•จ์ด ์™€๋„
๋‚˜์—๊ฒ ์˜๋ฏธ๊ฐ€ ์—†์„ ๋งŒํผ
๋˜๋‹ค์‹œ wake up ๋” wake up
๋‹ฌ๊ถˆ์ ธ ๋” ๋œจ๊ฒ๊ฒŒ

๊ตณ์ด ๋ด์ค„ ํ•„์š” ์—†์–ด ์–ด์ฐจํ”ผ never give up
๋ญ๊ฐ€ ๋ค๋ฒผ์˜ค๋“  ๋‚˜๋Š” ์–ด์ฉŒ๋ผ ํ•˜๋“ฏ์ด
๋ญ๋“  ํ•  ์ˆ˜ ์žˆ์„ ๊ฑฐ ๊ฐ™์•„

์ด์   ๊นจ์–ด๋‚˜ ์„ค๋ ˆ๋Š” ์ด ๋ง˜ ๋”ฐ๋ผ
๊ธฐ๋‹ค๋ฆฌ๋˜ ์˜ค๋Š˜์„ ๋งŒ๋“ค ๊ฑฐ์•ผ
๋„์‹œ ์‚ฌ์ด์— ํ”ผ์–ด๋‚˜ ์ด๊ฒจ๋‚ด์˜จ ๊ฝƒ์ฒ˜๋Ÿผ
๋ถ„๋ช… ํ”ผ์›Œ๋‚ผ ๊ฑฐ์•ผ ๋‚˜๋ฅผ

๋‚˜์—๊ฒŒ ๋‚จ์•„ ์žˆ๋Š” ๊ฒŒ ์–ผ๋งˆ ๋˜์ง€ ์•Š๋Š” ๊ฒƒ ๊ฐ™์•„๋„
๋‘ ๋ฒˆ ๋‹ค์‹  ๋‚ด๊ฒŒ ๋ถ€๋„๋Ÿฝ์ง€ ์•Š๊ฒŒ
๋˜‘๋ฐ”๋กœ ๋ด

๊ตณ์ด ๋ด์ค„ ํ•„์š” ์—†์–ด ์–ด์ฐจํ”ผ never give up
๋ญ๊ฐ€ ๋ค๋ฒผ์˜ค๋“  ๋‚˜๋Š” ์–ด์ฉŒ๋ผ ํ•˜๋“ฏ์ด
๋ญ๋“  ์ด๊ธธ ์ˆ˜ ์žˆ์„๊ฑฐ์•ผ

์ด์   ๊นจ์–ด๋‚˜ ์„ค๋ ˆ๋Š” ์ด๋ง˜ ๋”ฐ๋ผ
๊ธฐ๋‹ค๋ฆฌ๋˜ ์˜ค๋Š˜์„ ๋งŒ๋“ค ๊ฑฐ์•ผ
๋„์‹œ ์‚ฌ์ด์— ํ”ผ์–ด๋‚˜ ์ด๊ฒจ๋‚ด์˜จ ๊ฝƒ์ฒ˜๋Ÿผ
๋ถ„๋ช… ํ”ผ์›Œ๋‚ผ ๊ฑฐ์•ผ ๋‚˜๋ฅผ

Time's up ๋ช‡ ๋ฒˆ์„ํ•ด๋„ overcharge
๊ทธ๋งŒํ•ด๋„ ๋œ๋‹ค ํ•ด๋„
No matter, no matter ์ˆจ์ด ์ฐจ์˜ฌ๋ผ๋„
์ด ๊ธธ์„ ๋ฐ”๋ž€๋‹ค๋ฉด

ํ•˜๋Š˜ ์ € ๋„ˆ๋จธ ๋ป—์–ด๊ฐ€๋Š” ๋น› ๋”ฐ๋ผ
์ด ๋งˆ์Œ๋„ ๋‚ ๋ ค ๊ณง ๋‹ฟ์„ ๊ฒƒ๋งŒ ๊ฐ™์•„
๋„์‹œ ์‚ฌ์ด์— ํ”ผ์–ด๋‚˜ ์ด๊ฒจ๋‚ด์˜จ ๊ฝƒ์ฒ˜๋Ÿผ
๋ถ„๋ช… ํ”ผ์›Œ๋‚ผ ๊ฑฐ์•ผ ๋‚˜๋ฅผ"""


def int8_quantize(arr: np.ndarray) -> np.ndarray:
    qmin, qmax = -128, 127
    tanh_arr = np.tanh(arr)
    return np.round(tanh_arr * qmax).clip(qmin, qmax).astype(np.float32)


embed1 = model.encode(text)
embed2 = llama.embed(text)
embed2 = int8_quantize(embed2)
print(cos_sim(embed1, embed2).item())
0.9999861717224121
Downloads last month
470
GGUF
Model size
0.6B params
Architecture
qwen3
Hardware compatibility
Log In to add your hardware

3-bit

4-bit

5-bit

6-bit

8-bit

16-bit

32-bit

Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support

Model tree for mykor/pplx-embed-v1-0.6b-GGUF

Quantized
(4)
this model