| """ |
| HuggingFace Space - ESS Variable Classification Demo |
| Interactive Gradio interface for the XLM-RoBERTa ESS classifier. |
| Developed by Sikt - Norwegian Agency for Shared Services in Education and Research |
| """ |
| import gradio as gr |
| from transformers import pipeline |
|
|
| |
| MODEL_NAME = "benjaminBeuster/xlm-roberta-base-ess-classification" |
| classifier = pipeline("text-classification", model=MODEL_NAME) |
|
|
| |
| SIKT_COLORS = { |
| "amaranth": "#ee3243", |
| "meteorite": "#331c6c", |
| "selago": "#f3f1fe" |
| } |
|
|
| |
| CATEGORY_INFO = { |
| "DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)": "Demographics, population statistics, age, gender", |
| "ECONOMICS": "Economic issues, finance, income, wealth", |
| "EDUCATION": "Education, schooling, qualifications", |
| "HEALTH": "Healthcare, medical services, health satisfaction", |
| "POLITICS": "Political systems, trust in government, parliament", |
| "SOCIETY AND CULTURE": "Social issues, cultural topics, religion", |
| "LABOUR AND EMPLOYMENT": "Work, occupation, employment status", |
| "PSYCHOLOGY": "Mental health, psychological wellbeing", |
| "HOUSING AND LAND USE": "Housing conditions, residential environment", |
| "NATURAL ENVIRONMENT": "Environmental concerns, climate change", |
| "LAW, CRIME AND LEGAL SYSTEMS": "Justice, crime, legal matters", |
| "MEDIA, COMMUNICATION AND LANGUAGE": "Media use, communication patterns", |
| "SOCIAL STRATIFICATION AND GROUPINGS": "Social class, inequality, social groups", |
| "SOCIAL WELFARE POLICY AND SYSTEMS": "Social benefits, welfare services", |
| "TRANSPORT AND TRAVEL": "Transportation, mobility, travel patterns", |
| "TRADE, INDUSTRY AND MARKETS": "Business, commerce, markets", |
| "SCIENCE AND TECHNOLOGY": "Scientific advancement, technology use", |
| "HISTORY": "Historical events, memory, heritage", |
| "OTHER": "General or uncategorized topics" |
| } |
|
|
| def classify_text(text): |
| """Classify survey question/variable.""" |
| if not text.strip(): |
| return "Please enter some text to classify." |
|
|
| result = classifier(text)[0] |
| label = result['label'] |
| score = result['score'] |
|
|
| |
| output = f"**Category:** {label}\n\n" |
| output += f"**Confidence:** {score:.2%}\n\n" |
|
|
| if label in CATEGORY_INFO: |
| output += f"**Description:** {CATEGORY_INFO[label]}" |
|
|
| return output |
|
|
| |
| examples = [ |
| |
| ["What is the highest level of education you have successfully completed?"], |
| ["What is the highest level of education your mother successfully completed?"], |
| ["How many years of full-time education have you completed?"], |
|
|
| |
| ["Which party did you vote for in the last national election?"], |
| ["Trust in country's parliament"], |
| ["How satisfied are you with the way democracy works in your country?"], |
| ["How much do you trust the legal system?"], |
|
|
| |
| ["How satisfied are you with the healthcare system?"], |
| ["Which health problems that you had in the last 12 months hampered you in your daily activities?"], |
| ["How is your health in general - very good, good, fair, bad, or very bad?"], |
|
|
| |
| ["What best describes what you have been doing for the last 7 days - in paid work?"], |
| ["Which description best describes the sort of work your mother did when you were 14?"], |
| ["How many hours do you normally work per week in your main job?"], |
| ["Are you a member of a trade union or similar organization?"], |
|
|
| |
| ["How often do you pray apart from at religious services?"], |
| ["How important is it to always behave properly and avoid doing anything people would say is wrong?"], |
| ["Do you consider yourself as belonging to any particular religion or denomination?"], |
|
|
| |
| ["What is your age?"], |
| ["What is your gender?"], |
| ["What is your current legal marital status?"], |
| ["In which country were you born?"], |
|
|
| |
| ["Which of the descriptions on this card comes closest to how you feel about your household's income nowadays?"], |
| ["What is your household's total net income from all sources?"], |
|
|
| |
| ["Taking all things together, how happy would you say you are?"], |
| ["Have you felt depressed or sad in the last two weeks?"], |
| ["How often do you feel stressed?"], |
|
|
| |
| ["How worried are you about climate change?"], |
| ["To what extent do you think climate change is caused by human activity?"], |
|
|
| |
| ["How safe do you feel walking alone at night in your local area?"], |
| ["Have you or a member of your household been a victim of burglary or assault in the last 5 years?"], |
|
|
| |
| ["How much time do you spend watching television on an average weekday?"], |
| ["How often do you use the internet for news?"], |
|
|
| |
| ["In society there are groups which tend to be towards the top and groups which tend to be towards the bottom. Where would you place yourself?"], |
| ["Do you belong to any discriminated group in this country?"], |
|
|
| |
| ["Do you rent or own your accommodation?"], |
| ["How many rooms do you have for your household's use only?"], |
|
|
| |
| ["Should the government reduce income differences?"], |
| ["How satisfied are you with the state of social benefits in your country?"], |
|
|
| |
| ["How long does your daily commute to work take?"], |
| ["What is your main mode of transportation?"], |
|
|
| |
| ["To what extent do you think scientific advances benefit society?"], |
| ["How often do you use a smartphone or tablet?"], |
| ] |
|
|
| |
| custom_css = """ |
| .gradio-container { |
| font-family: "Source Sans Pro", -apple-system, BlinkMacSystemFont, sans-serif; |
| } |
| h1 { |
| color: #331c6c !important; |
| } |
| .header-logo { |
| display: flex; |
| align-items: center; |
| gap: 1rem; |
| margin-bottom: 1rem; |
| } |
| button.primary { |
| background-color: #ee3243 !important; |
| border-color: #ee3243 !important; |
| } |
| button.primary:hover { |
| background-color: #d62839 !important; |
| border-color: #d62839 !important; |
| } |
| .tabs { |
| border-color: #331c6c !important; |
| } |
| footer { |
| background-color: #f3f1fe !important; |
| } |
| """ |
|
|
| |
| demo = gr.Interface( |
| fn=classify_text, |
| inputs=gr.Textbox( |
| lines=3, |
| placeholder="Enter a survey question or variable description...", |
| label="Survey Question" |
| ), |
| outputs=gr.Markdown(label="Classification Result"), |
| title="🔍 ESS Variable Classification", |
| description=""" |
| <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem;"> |
| <img src="https://cdn.brandfetch.io/id9VCyV64w/theme/dark/logo.svg?c=1bxid64Mup7aczewSAYMX" |
| alt="Sikt Logo" style="height: 40px;"> |
| <div> |
| <p style="margin: 0; color: #331c6c; font-size: 1.1em; font-weight: 500;"> |
| Developed by <strong>Sikt</strong> – Norwegian Agency for Shared Services in Education and Research |
| </p> |
| </div> |
| </div> |
| |
| Automatically classify European Social Survey (ESS) questions into **19 subject categories**. |
| |
| This AI model is fine-tuned from XLM-RoBERTa-Base and achieves **83.8% accuracy** on the test set. |
| """, |
| examples=examples, |
| article=""" |
| --- |
| |
| ### About This Tool |
| |
| This classifier helps researchers and data managers organize survey variables by automatically |
| categorizing them into subject areas. The model was trained on European Social Survey metadata |
| and can classify questions into categories including: |
| |
| - **Education** • **Politics** • **Health** • **Labour & Employment** |
| - **Society & Culture** • **Economics** • **Psychology** • **Demographics** |
| - And 11 more categories |
| |
| ### Technical Details |
| |
| - **Base Model:** [XLM-RoBERTa-Base](https://huggingface.co/FacebookAI/xlm-roberta-base) (125M parameters) |
| - **Fine-tuned Model:** [benjaminBeuster/xlm-roberta-base-ess-classification](https://huggingface.co/benjaminBeuster/xlm-roberta-base-ess-classification) |
| - **Performance:** 83.8% accuracy | F1: 0.796 (weighted) | 105 test samples |
| - **Training Data:** [ESS Classification Dataset](https://huggingface.co/datasets/benjaminBeuster/ess_classification) |
| |
| ### About Sikt |
| |
| [Sikt](https://sikt.no) – Norwegian Agency for Shared Services in Education and Research |
| provides digital infrastructure and services for research and education in Norway. |
| |
| --- |
| |
| <div style="text-align: center; padding: 1rem; background-color: #f3f1fe; border-radius: 8px; margin-top: 1rem;"> |
| <p style="color: #331c6c; margin: 0;"> |
| Questions or feedback? Visit <a href="https://sikt.no" style="color: #ee3243; text-decoration: none; font-weight: 600;">sikt.no</a> |
| </p> |
| </div> |
| """, |
| theme=gr.themes.Soft( |
| primary_hue="red", |
| secondary_hue="purple", |
| ), |
| css=custom_css |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|