Sentence Similarity
sentence-transformers
Safetensors
bert
feature-extraction
text-embeddings-inference
Instructions to use yahyaabd/sbert-bps-custom-tokenizer-en with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use yahyaabd/sbert-bps-custom-tokenizer-en with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("yahyaabd/sbert-bps-custom-tokenizer-en") sentences = [ "That is a happy person", "That is a happy dog", "That is a very happy person", "Today is a sunny day" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4] - Notebooks
- Google Colab
- Kaggle
| { | |
| "APK": 250075, | |
| "APM": 250076, | |
| "Agricultural Census": 250047, | |
| "Agricultural Census 2013": 250035, | |
| "Agricultural Census 2023": 250033, | |
| "BPP": 250039, | |
| "BPS": 250408, | |
| "CBR": 250229, | |
| "CIF": 250093, | |
| "CPI": 250006, | |
| "City Minimum Wage": 250020, | |
| "DHS": 250029, | |
| "Decent Living Needs Survey": 250051, | |
| "Domestic Investment": 250064, | |
| "EYS": 250083, | |
| "Economic Census": 250046, | |
| "Environmental Quality Index": 250058, | |
| "Expected Years of Schooling": 250084, | |
| "FOB": 250091, | |
| "Farmer's Terms of Trade": 250009, | |
| "Foreign Investment": 250066, | |
| "GRDP": 250003, | |
| "GVA": 250080, | |
| "Gender Development Index": 250060, | |
| "Gender Empowerment Measure": 250062, | |
| "Gross Value Added": 250081, | |
| "HDI": 250023, | |
| "Happiness Index": 250055, | |
| "IBS": 250072, | |
| "IDG": 250061, | |
| "IHK": 250005, | |
| "IKD": 250052, | |
| "IKH": 250054, | |
| "IKLH": 250057, | |
| "IMK": 250012, | |
| "IMR": 250232, | |
| "IPG": 250059, | |
| "IPH": 250007, | |
| "IPKP": 250053, | |
| "IPM": 250022, | |
| "ITB": 250011, | |
| "ITK": 250010, | |
| "Indonesia Democracy Index": 250056, | |
| "Intercensal Population Survey": 250027, | |
| "KBKI": 250069, | |
| "KBLI": 250067, | |
| "KSK": 250077, | |
| "LPE": 250013, | |
| "LTN": 250037, | |
| "LTT": 250036, | |
| "Large and Medium Industries": 250073, | |
| "Life Expectancy": 250082, | |
| "MMR": 250235, | |
| "MYS": 250085, | |
| "Mean Years of Schooling": 250086, | |
| "Micro and Small Industries": 250074, | |
| "NTP": 250008, | |
| "NTUP": 250038, | |
| "National Labor Force Survey": 250041, | |
| "National Socio-Economic Survey": 250043, | |
| "Open Unemployment Rate": 250016, | |
| "PCL": 250071, | |
| "PDB": 250004, | |
| "PDRB": 250002, | |
| "PMA": 250065, | |
| "PMDN": 250063, | |
| "PML": 250078, | |
| "PODES": 250048, | |
| "Population Census": 250045, | |
| "Population Census 2020": 250025, | |
| "Provincial Minimum Wage": 250017, | |
| "Purchasing Power Parity": 250079, | |
| "Regency Minimum Wage": 250019, | |
| "SDGI": 250030, | |
| "SDKI": 250028, | |
| "SKL": 250050, | |
| "SKTNP": 250044, | |
| "SP2020": 250024, | |
| "ST2013": 250034, | |
| "ST2023": 250032, | |
| "SUPAS": 250026, | |
| "SUTAS": 250031, | |
| "Sakernas": 250040, | |
| "Standard Classification of Indonesian Business Fields": 250068, | |
| "Standard Classification of Indonesian Goods": 250070, | |
| "Statistics Indonesia": 250409, | |
| "Susenas": 250042, | |
| "TFR": 250239, | |
| "TPK": 250014, | |
| "TPT": 250015, | |
| "UMK": 250018, | |
| "UMR": 250021, | |
| "Village Potential Statistics": 250049, | |
| "aceh": 250365, | |
| "administered price inflation": 250113, | |
| "agricultural subsector": 250290, | |
| "air bersih": 250245, | |
| "akses internet": 250316, | |
| "angka kelahiran kasar": 250227, | |
| "angka kematian bayi": 250230, | |
| "angka kematian ibu": 250233, | |
| "angka melek huruf": 250269, | |
| "angka partisipasi sekolah": 250271, | |
| "angka putus sekolah": 250273, | |
| "angkatan kerja": 250125, | |
| "babel": 250373, | |
| "balance of payments": 250097, | |
| "banten": 250380, | |
| "bengkulu": 250371, | |
| "birth": 250222, | |
| "blok sensus": 250164, | |
| "bps kabupaten": 250412, | |
| "bps kota": 250414, | |
| "bps provinsi": 250410, | |
| "bukan angkatan kerja": 250128, | |
| "business field": 250136, | |
| "catalog": 250348, | |
| "catalogue": 250349, | |
| "census": 250145, | |
| "census block": 250165, | |
| "census enumerator": 250155, | |
| "census taker": 250154, | |
| "cif": 250092, | |
| "city statistics office": 250415, | |
| "clean water": 250246, | |
| "community health center": 250265, | |
| "constant price": 250178, | |
| "consumption pattern": 250325, | |
| "core inflation": 250109, | |
| "corn production": 250281, | |
| "correlation": 250362, | |
| "crime": 250337, | |
| "crime rate": 250340, | |
| "crude birth rate": 250228, | |
| "current price": 250180, | |
| "data processing": 250157, | |
| "death": 250225, | |
| "deflasi": 250104, | |
| "deflation": 250105, | |
| "demography": 250203, | |
| "dependency ratio": 250217, | |
| "desa": 250404, | |
| "diseminasi": 250158, | |
| "disguised unemployment": 250122, | |
| "dissemination": 250159, | |
| "district": 250403, | |
| "drinking water": 250247, | |
| "dropout rate": 250274, | |
| "economic growth": 250182, | |
| "economic sector": 250137, | |
| "education": 250268, | |
| "ekspor": 250087, | |
| "enumerator": 250152, | |
| "environment": 250309, | |
| "environmental quality": 250311, | |
| "estate crops": 250297, | |
| "estimasi": 250173, | |
| "estimate": 250175, | |
| "estimation": 250174, | |
| "expenditure group": 250327, | |
| "expenditure inequality": 250201, | |
| "external debt": 250099, | |
| "fasilitas kesehatan": 250262, | |
| "fertility": 250223, | |
| "fertility rate": 250237, | |
| "fishery": 250303, | |
| "fob": 250090, | |
| "food crops": 250294, | |
| "food insecurity": 250335, | |
| "forestry": 250301, | |
| "garis kemiskinan": 250185, | |
| "gini coefficient": 250197, | |
| "gini ratio": 250196, | |
| "gizi": 250253, | |
| "gizi buruk": 250257, | |
| "gorontalo": 250392, | |
| "harga berlaku": 250179, | |
| "harga diatur pemerintah": 250112, | |
| "harga konstan": 250177, | |
| "harvested area": 250286, | |
| "head of household": 250315, | |
| "health facility": 250263, | |
| "health workers": 250250, | |
| "highest educational attainment": 250276, | |
| "hiperinflasi": 250106, | |
| "home ownership": 250319, | |
| "horticulture": 250292, | |
| "hortikultura": 250291, | |
| "household": 250313, | |
| "household consumption": 250321, | |
| "housing condition": 250329, | |
| "hs code": 250089, | |
| "hyperinflation": 250107, | |
| "ijazah tertinggi": 250275, | |
| "immunization": 250252, | |
| "impor": 250088, | |
| "imunisasi": 250251, | |
| "in-migration": 250209, | |
| "indeks kedalaman kemiskinan": 250190, | |
| "indeks keparahan kemiskinan": 250192, | |
| "indicator": 250351, | |
| "indikator": 250350, | |
| "industri pengolahan": 250304, | |
| "inequality": 250199, | |
| "infant mortality rate": 250231, | |
| "inflasi": 250102, | |
| "inflasi inti": 250108, | |
| "inflasi pangan bergejolak": 250110, | |
| "inflation": 250103, | |
| "internet access": 250317, | |
| "investasi": 250100, | |
| "investment": 250101, | |
| "jabar": 250376, | |
| "jakarta": 250375, | |
| "jambi": 250370, | |
| "jateng": 250377, | |
| "jatim": 250378, | |
| "kabupaten": 250398, | |
| "kalbar": 250383, | |
| "kalsel": 250385, | |
| "kalteng": 250384, | |
| "kaltim": 250386, | |
| "kalut": 250387, | |
| "kecamatan": 250401, | |
| "kehutanan": 250300, | |
| "kejahatan": 250336, | |
| "kelahiran": 250221, | |
| "kelompok pengeluaran": 250326, | |
| "kelurahan": 250406, | |
| "kematian": 250224, | |
| "kemiskinan": 250183, | |
| "kepala rumah tangga": 250314, | |
| "kepemilikan rumah": 250318, | |
| "kependudukan": 250202, | |
| "kepri": 250369, | |
| "kerangka sampel": 250162, | |
| "kerawanan pangan": 250334, | |
| "kesehatan": 250242, | |
| "ketimpangan": 250198, | |
| "ketimpangan pengeluaran": 250200, | |
| "kondisi perumahan": 250328, | |
| "konsumsi rumah tangga": 250320, | |
| "korban kejahatan": 250338, | |
| "korelasi": 250361, | |
| "kualitas lingkungan": 250310, | |
| "kuesioner": 250148, | |
| "labor force": 250126, | |
| "labour force": 250127, | |
| "lampung": 250374, | |
| "lapangan usaha": 250135, | |
| "lingkungan": 250308, | |
| "literacy rate": 250270, | |
| "livestock": 250299, | |
| "luas panen": 250285, | |
| "maize production": 250282, | |
| "malnutrition": 250258, | |
| "maluku": 250394, | |
| "malut": 250395, | |
| "manpower": 250131, | |
| "manufacturing industry": 250305, | |
| "margin of error": 250176, | |
| "maternal mortality rate": 250234, | |
| "mean": 250354, | |
| "median": 250355, | |
| "medical personnel": 250249, | |
| "metadata": 250160, | |
| "migrasi keluar": 250210, | |
| "migrasi masuk": 250208, | |
| "migrasi neto": 250212, | |
| "migration": 250207, | |
| "minimum wage": 250134, | |
| "mitra statistik": 250344, | |
| "modus": 250356, | |
| "mortality": 250226, | |
| "mtm": 250115, | |
| "municipality": 250400, | |
| "neraca pembayaran": 250096, | |
| "neraca perdagangan": 250094, | |
| "net migration": 250213, | |
| "nilai tambah": 250306, | |
| "ntb": 250381, | |
| "ntt": 250382, | |
| "nutrition": 250254, | |
| "nutritional status": 250256, | |
| "out-migration": 250211, | |
| "outside labor force": 250129, | |
| "palm oil production": 250284, | |
| "papbar": 250397, | |
| "papua": 250396, | |
| "pendidikan": 250267, | |
| "penduduk miskin": 250187, | |
| "penduduk usia produktif": 250218, | |
| "pengangguran": 250119, | |
| "pengangguran terselubung": 250121, | |
| "pengeluaran per kapita": 250322, | |
| "pengolahan data": 250156, | |
| "people below poverty line": 250189, | |
| "per capita expenditure": 250323, | |
| "perikanan": 250302, | |
| "perkebunan": 250295, | |
| "pertanian": 250277, | |
| "pertumbuhan ekonomi": 250181, | |
| "peternakan": 250298, | |
| "petugas sensus": 250153, | |
| "piramida penduduk": 250205, | |
| "plantation": 250296, | |
| "pola konsumsi": 250324, | |
| "poor population": 250188, | |
| "populasi": 250171, | |
| "population": 250172, | |
| "population pyramid": 250206, | |
| "population studies": 250204, | |
| "poverty": 250184, | |
| "poverty gap index": 250191, | |
| "poverty line": 250186, | |
| "poverty severity index": 250193, | |
| "primary sector": 250139, | |
| "productive age population": 250219, | |
| "productivity": 250288, | |
| "produksi jagung": 250280, | |
| "produksi kelapa sawit": 250283, | |
| "produksi padi": 250278, | |
| "produktivitas": 250287, | |
| "provincial statistics office": 250411, | |
| "publication": 250347, | |
| "publikasi": 250346, | |
| "puskesmas": 250264, | |
| "questionnaire": 250149, | |
| "rasio gini": 250195, | |
| "rasio jenis kelamin": 250240, | |
| "rasio ketergantungan": 250216, | |
| "regency": 250399, | |
| "regency statistics office": 250413, | |
| "regresi": 250363, | |
| "regression": 250364, | |
| "responden": 250150, | |
| "respondent": 250151, | |
| "riau": 250368, | |
| "rice production": 250279, | |
| "rumah sakit": 250266, | |
| "rumah tangga": 250312, | |
| "sampel": 250169, | |
| "sample": 250170, | |
| "sampling frame": 250163, | |
| "sanitasi": 250243, | |
| "sanitation": 250244, | |
| "school participation rate": 250272, | |
| "secondary sector": 250141, | |
| "sektor primer": 250138, | |
| "sektor sekunder": 250140, | |
| "sektor tersier": 250142, | |
| "sensus": 250144, | |
| "setengah pengangguran": 250124, | |
| "sex ratio": 250241, | |
| "source of drinking water": 250331, | |
| "source of lighting": 250333, | |
| "squared poverty gap index": 250194, | |
| "standar deviasi": 250357, | |
| "standard deviation": 250358, | |
| "statistical metadata": 250161, | |
| "statistical partner": 250345, | |
| "statistical working area": 250167, | |
| "statistician": 250343, | |
| "statistics": 250341, | |
| "statistisi": 250342, | |
| "status gizi": 250255, | |
| "stunting": 250260, | |
| "subdistrict": 250402, | |
| "subsektor pertanian": 250289, | |
| "sulbar": 250393, | |
| "sulsel": 250390, | |
| "sulteng": 250389, | |
| "sultra": 250391, | |
| "sulut": 250388, | |
| "sumbar": 250367, | |
| "sumber air minum": 250330, | |
| "sumber penerangan": 250332, | |
| "sumsel": 250372, | |
| "sumut": 250366, | |
| "survei": 250146, | |
| "survey": 250147, | |
| "tanaman pangan": 250293, | |
| "tenaga kerja": 250130, | |
| "tenaga medis": 250248, | |
| "tertiary sector": 250143, | |
| "tingkat fertilitas": 250236, | |
| "tingkat pengangguran": 250117, | |
| "total fertility rate": 250238, | |
| "trade balance": 250095, | |
| "underemployment": 250123, | |
| "undernutrition": 250259, | |
| "unemployment": 250120, | |
| "unemployment rate": 250118, | |
| "upah minimum": 250133, | |
| "urban village": 250407, | |
| "urbanisasi": 250214, | |
| "urbanization": 250215, | |
| "utang luar negeri": 250098, | |
| "value added": 250307, | |
| "variabel": 250352, | |
| "variable": 250353, | |
| "variance": 250360, | |
| "varians": 250359, | |
| "victim of crime": 250339, | |
| "village": 250405, | |
| "volatile food inflation": 250111, | |
| "wasting": 250261, | |
| "wilayah kerja statistik": 250166, | |
| "wilkerstat": 250168, | |
| "workforce": 250132, | |
| "working age population": 250220, | |
| "yogyakarta": 250379, | |
| "yoy": 250114, | |
| "ytd": 250116 | |
| } | |