Spaces:

lspcloud
/

amazon_mturker

Running

App Files Files Community

ehejin commited on 19 days ago

Commit

9455a27

1 Parent(s): 839eaee

changed to tinker api and merged movies and groceries

Browse files

Files changed (2) hide show

Dockerfile +1 -1
src/streamlit_app.py +335 -193

Dockerfile CHANGED Viewed

@@ -17,4 +17,4 @@ EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]


17
18	HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
20	+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--", "--mode", "mixed"]

src/streamlit_app.py CHANGED Viewed

@@ -1,21 +1,23 @@
 """
 Streamlit App: AI Product Willingness User Study
 =================================================
-Run locally:
     streamlit run src/streamlit_app.py -- --category groceries
     streamlit run src/streamlit_app.py -- --category groceries --debug
 On HuggingFace Spaces, set these environment variables in Space Settings → Variables:
     HF_TOKEN           - HuggingFace token
     TINKER_API_KEY     - Tinker AI API key
-    TINKER_MODEL_PATH  - Tinker sampler checkpoint path
     DATASET_REPO_ID    - HuggingFace dataset repo to upload results
-    CATEGORY           - groceries | books | movies | health  (default: groceries)
     DEBUG_MODE         - "true" to skip validation (optional)
 """
-import asyncio
-import concurrent.futures
 import csv
 import json
 import os
@@ -32,33 +34,45 @@ import streamlit as st
 from dotenv import load_dotenv
 from filelock import FileLock
 from huggingface_hub import HfApi
-from openai import AsyncOpenAI
 load_dotenv()
 # ---------------------------------------------------------------------------
-# CLI args (supported locally; ignored on HF Spaces — use env vars instead)
 # ---------------------------------------------------------------------------
 import argparse
 parser = argparse.ArgumentParser(add_help=False)
 parser.add_argument("--category", choices=["books", "groceries", "movies", "health"], default=None)
 parser.add_argument("--debug", action="store_true", default=False)
 cli_args, _ = parser.parse_known_args()
 # ---------------------------------------------------------------------------
-# Config  (env vars take precedence, then CLI args, then defaults)
 # ---------------------------------------------------------------------------
-CATEGORY = os.getenv("CATEGORY") or cli_args.category or "groceries"
 DEBUG_MODE = os.getenv("DEBUG_MODE", "").lower() == "true" or cli_args.debug
 DATASET_REPO_ID = os.getenv("DATASET_REPO_ID", "your-username/product-study")
 HF_TOKEN = os.getenv("HF_TOKEN")
-# TINKER_API_KEY = os.getenv("TINKER_API_KEY")
-# TINKER_BASE_URL = "https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1"
-# MODEL_NAME = os.getenv("TINKER_MODEL_PATH", "tinker://YOUR_RUN_ID:train:0/sampler_weights/000080")
-TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
-TOGETHER_BASE_URL = "https://api.together.xyz/v1"
-MODEL_NAME = "openai/gpt-oss-20b"  # or whichever model you want
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DATA_DIR = os.path.join(BASE_DIR, "data")
@@ -67,28 +81,28 @@ os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(ANNOTATIONS_DIR, exist_ok=True)
 CATEGORY_TO_HF = {
-    "books": "ehejin/amazon_books",
     "groceries": "ehejin/amazon_Grocery_and_Gourmet_Food",
-    "movies": "ehejin/amazon_Movies_and_TV",
-    "health": "ehejin/amazon_Health_and_Household",
 }
 CATEGORY_DISPLAY = {
-    "books": "Books",
     "groceries": "Grocery Products",
-    "movies": "Movies & TV",
-    "health": "Health & Household Products",
 }
 FAMILIARITY_USED_LABEL = {
-    "books": "Read it before",
-    "movies": "Watched it before",
     "groceries": "Used it before",
-    "health": "Used it before",
 }
 PRODUCTS_PER_USER = 5
 MIN_TURNS = 3
 MAX_TURNS = 10
-TEST_SUBSET_SIZE = 100  # only use first 100 items from test split
 # Familiarity values that trigger a product swap
 SWAP_FAMILIARITY = {"Purchased it before"}
@@ -114,32 +128,50 @@ WILLINGNESS_LABELS = {
 }
 WILLINGNESS_CHOICES = [f"{v} ({k})" for k, v in WILLINGNESS_LABELS.items()]
 # ---------------------------------------------------------------------------
-# Dataset loading — test split, first 100 items
 # ---------------------------------------------------------------------------
-LOCAL_DATA_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_test100.json")
-# Counter tracks which of the 100 products have been assigned globally
-COUNTER_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_counter.txt")
-COUNTER_LOCK_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_counter.lock")
-RETURN_QUEUE_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_return_queue.json")
-# Overflow pool for swap replacements (products beyond the 100, or re-used ones)
-OVERFLOW_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_overflow.json")
 @st.cache_resource
-def download_and_cache_dataset():
-    """Download test split (first 100 items) from HuggingFace and cache locally."""
-    if os.path.exists(LOCAL_DATA_PATH):
-        print(f"[DATA] Found cached dataset at {LOCAL_DATA_PATH}")
         return
-    print(f"[DATA] Downloading {CATEGORY_TO_HF[CATEGORY]} (test split) from HuggingFace...")
     try:
         from datasets import load_dataset
         import huggingface_hub
         if HF_TOKEN:
             huggingface_hub.login(token=HF_TOKEN)
-        ds = load_dataset(CATEGORY_TO_HF[CATEGORY], split="test")
         def to_list(val):
             if isinstance(val, list): return val
@@ -155,183 +187,244 @@ def download_and_cache_dataset():
                 "description": to_list(meta.get("description", []) if isinstance(meta, dict) else []),
                 "features": to_list(meta.get("features", []) if isinstance(meta, dict) else []),
                 "price": meta.get("price", "N/A") if isinstance(meta, dict) else "N/A",
-                "category": CATEGORY,
             }
             all_items.append(item)
-        # First 100 are the primary pool; the rest are the overflow/swap pool
-        primary = all_items[:TEST_SUBSET_SIZE]
-        overflow = all_items[TEST_SUBSET_SIZE:]
-        with open(LOCAL_DATA_PATH, "w") as f:
             json.dump(primary, f, indent=2)
-        with open(OVERFLOW_PATH, "w") as f:
             json.dump(overflow, f, indent=2)
-        print(f"[DATA] Cached {len(primary)} primary + {len(overflow)} overflow items.")
     except Exception as e:
-        print(f"[DATA] ERROR downloading dataset: {e}")
         raise
 @st.cache_resource
-def load_primary_dataset():
-    with open(LOCAL_DATA_PATH, "r") as f:
         return json.load(f)
 @st.cache_resource
-def load_overflow_dataset():
-    if not os.path.exists(OVERFLOW_PATH):
         return []
-    with open(OVERFLOW_PATH, "r") as f:
         return json.load(f)
-def assign_products(n=PRODUCTS_PER_USER):
     """
-    Atomically assign the next n products.
-    Drains the return queue first (rejected products waiting for reassignment),
-    then pulls from the primary pool sequentially.
-    Falls back to overflow only if primary 100 is fully exhausted.
     """
-    items = load_primary_dataset()
     total = len(items)
-    lock = FileLock(COUNTER_LOCK_PATH)
-    with lock:
-        # Load return queue
-        return_queue = []
-        if os.path.exists(RETURN_QUEUE_PATH):
-            with open(RETURN_QUEUE_PATH, "r") as f:
-                try:
-                    return_queue = json.load(f)
-                except Exception:
-                    return_queue = []
-        # Load counter
-        counter = 0
-        if os.path.exists(COUNTER_PATH):
-            with open(COUNTER_PATH, "r") as f:
-                counter = int(f.read().strip() or "0")
         assigned = []
         for _ in range(n):
             if return_queue:
-                # Prioritise returned products so they still get reviewed
                 assigned.append(return_queue.pop(0))
-            elif counter < total:
-                assigned.append(items[counter])
-                counter += 1
             else:
-                # Primary pool exhausted — fall back to overflow
-                overflow = load_overflow_dataset()
-                if overflow:
-                    assigned.append(overflow[0])
-                # If truly nothing left, skip (shouldn't happen with 20 users / 100 products)
-        # Persist state
-        with open(RETURN_QUEUE_PATH, "w") as f:
-            json.dump(return_queue, f)
-        with open(COUNTER_PATH, "w") as f:
-            f.write(str(counter))
     return assigned
-def return_product_to_queue(product: dict):
     """
-    Put a rejected/swapped product back into the queue so it gets
-    reassigned to the next available user slot.
     """
-    lock = FileLock(COUNTER_LOCK_PATH)
     with lock:
-        queue = []
-        if os.path.exists(RETURN_QUEUE_PATH):
-            with open(RETURN_QUEUE_PATH, "r") as f:
-                try:
-                    queue = json.load(f)
-                except Exception:
-                    queue = []
-        # Avoid duplicates
         if not any(p["id"] == product["id"] for p in queue):
             queue.append(product)
-        with open(RETURN_QUEUE_PATH, "w") as f:
-            json.dump(queue, f)
-def get_swap_product(exclude_ids: set) -> dict | None:
     """
-    Get the next unassigned product from the primary pool (advances the counter
-    so the picked product is consumed and won't be assigned again),
-    fall back to any primary product not held by this user (overlap ok),
-    then overflow.
     """
-    items = load_primary_dataset()
-    overflow = load_overflow_dataset()
-    total = len(items)
-    lock = FileLock(COUNTER_LOCK_PATH)
     with lock:
-        counter = 0
-        if os.path.exists(COUNTER_PATH):
-            with open(COUNTER_PATH, "r") as f:
-                counter = int(f.read().strip() or "0")
-        # 1. Try unassigned primary products — advance counter when we pick one
-        while counter < total:
-            candidate = items[counter]
             counter += 1
             if candidate["id"] not in exclude_ids:
-                # Persist advanced counter so this product isn't assigned again
-                with open(COUNTER_PATH, "w") as f:
-                    f.write(str(counter))
                 return candidate
-        # 2. Primary pool exhausted — any primary product not held by this user (overlap ok)
         for p in items:
             if p["id"] not in exclude_ids:
                 return p
-        # 3. Last resort: overflow
         for p in overflow:
             if p["id"] not in exclude_ids:
                 return p
-    return None  # extremely unlikely
 # ---------------------------------------------------------------------------
-# AI client
 # ---------------------------------------------------------------------------
 @st.cache_resource
-def get_model_client():
-    return AsyncOpenAI(
-        base_url=TOGETHER_BASE_URL,
-        api_key=TOGETHER_API_KEY,
-        timeout=60.0,
-    )
-def call_model(messages: list) -> str:
-    async def _call():
-        try:
-            client = get_model_client()
-            response = await client.chat.completions.create(
-                model=MODEL_NAME,
-                messages=messages,
-                max_tokens=1000,
-                temperature=0.7,
-                top_p=0.9,
-            )
-            content = response.choices[0].message.content.strip()
-            content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
-            return content
-        except Exception as e:
-            print(f"[MODEL] Error: {e}")
-            return f"[Model error: {e}]"
-    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-        future = pool.submit(asyncio.run, _call())
-        return future.result()
 # ---------------------------------------------------------------------------
@@ -355,10 +448,11 @@ def get_hf_api():
 def save_and_upload(state: dict):
     hf_api = get_hf_api()
-    worker_id = state.get("worker_id") or state.get("user_id", "anonymous")
     submission_id = state.get("submission_id", str(uuid.uuid4()))
     safe_worker = "".join(c if c.isalnum() else "_" for c in str(worker_id))
-    filename = f"{submission_id}_{CATEGORY}.json"
     folder = os.path.join(ANNOTATIONS_DIR, safe_worker)
     os.makedirs(folder, exist_ok=True)
     file_path = os.path.join(folder, filename)
@@ -383,7 +477,8 @@ def upload_csv_rows(state: dict, hf_api, safe_worker: str, submission_id: str):
     demographics = state.get("demographics", {})
     products = state.get("products", [])
     header = [
-        "submission_id", "worker_id", "submission_time", "duration_seconds", "category",
         "age", "gender", "geographic_region", "education_level", "race",
         "us_citizen", "marital_status", "religion", "religious_attendance",
         "political_affiliation", "income", "political_views", "household_size", "employment_status",
@@ -400,10 +495,14 @@ def upload_csv_rows(state: dict, hf_api, safe_worker: str, submission_id: str):
         post = prod.get("post_willingness", "")
         delta = (post - pre) if isinstance(pre, int) and isinstance(post, int) else ""
         row = [
-            submission_id, state.get("worker_id", ""),
             state.get("meta", {}).get("submission_time", ""),
             state.get("meta", {}).get("duration_seconds", ""),
-            CATEGORY,
             demographics.get("age", ""), demographics.get("gender", ""),
             demographics.get("geographic_region", ""), demographics.get("education_level", ""),
             demographics.get("race", ""), demographics.get("us_citizen", ""),
@@ -491,8 +590,9 @@ def parse_willingness(choice_str: str) -> int:
         return 4
-def get_familiarity_choices():
-    used_label = FAMILIARITY_USED_LABEL.get(CATEGORY, "Used it before")
     return [
         "Never heard of it",
         "Heard of it, but not used/purchased",
@@ -502,7 +602,6 @@ def get_familiarity_choices():
 def needs_swap(familiarity_val: str, pre_will_val: str) -> bool:
-    """Return True if this product should be swapped out."""
     if familiarity_val in SWAP_FAMILIARITY:
         return True
     if pre_will_val == WILLINGNESS_CHOICES[-1]:  # "Definitely would buy (7)"
@@ -510,6 +609,26 @@ def needs_swap(familiarity_val: str, pre_will_val: str) -> bool:
     return False
 # ---------------------------------------------------------------------------
 # State initialisation
 # ---------------------------------------------------------------------------
@@ -520,6 +639,7 @@ def make_product_slot(p: dict, was_swapped: bool = False) -> dict:
         "description": p.get("description", []),
         "features": p.get("features", []),
         "price": p.get("price", "N/A"),
         "familiarity": None,
         "pre_willingness": None,
         "post_willingness": None,
@@ -536,7 +656,7 @@ def make_product_slot(p: dict, was_swapped: bool = False) -> dict:
 def init_state():
-    download_and_cache_dataset()
     assigned = assign_products(PRODUCTS_PER_USER)
     try:
@@ -547,12 +667,12 @@ def init_state():
     return {
         "submission_id": str(uuid.uuid4()),
         "user_id": str(uuid.uuid4()),
-        "worker_id": params.get("workerId", ""),
-        "assignment_id": params.get("assignmentId", ""),
-        "hit_id": params.get("hitId", ""),
-        "turk_submit_to": params.get("turkSubmitTo", ""),
         "start_time": time.time(),
-        "category": CATEGORY,
         "demographics": {},
         "products": [make_product_slot(p) for p in assigned],
         "current_product_index": 0,
@@ -586,6 +706,14 @@ def inject_css():
     }
     .pc-title { font-size: 1.05rem; font-weight: 700; color: #1a1a2e; line-height: 1.35; flex: 1; }
     .pc-price { font-size: 1.2rem; font-weight: 800; color: #16a34a; white-space: nowrap; }
     .pc-section { margin-top: 0.5rem; }
     .pc-section-title {
         font-weight: 600; font-size: 0.85rem; color: #475569;
@@ -616,15 +744,20 @@ def render_product_card_html(product: dict, compact: bool = False) -> str:
     price = product.get("price", "N/A")
     description = product.get("description", [])
     features = product.get("features", [])
     price_str = f"${price}" if price and price != "N/A" and not str(price).startswith("$") else price
-    # Description: joined with spaces as prose
     desc_html = ""
     if description:
         desc_text = " ".join(d for d in description if d)
         desc_html = f'<div class="pc-section"><div class="pc-section-title">📋 Description</div><div class="pc-desc">{desc_text}</div></div>'
-    # Features: bullet points
     feat_html = ""
     if features:
         items_html = "".join(f"<li>{feat}</li>" for feat in features if feat)
@@ -633,6 +766,7 @@ def render_product_card_html(product: dict, compact: bool = False) -> str:
     max_h = "max-height:240px;overflow-y:auto;" if compact else ""
     return f"""
     <div class="product-card" style="{max_h}">
         <div class="pc-header">
             <div class="pc-title">{title}</div>
             <div class="pc-price">{price_str}</div>
@@ -668,8 +802,11 @@ def render_chat_history(turns: list):
 # ---------------------------------------------------------------------------
 def screen_welcome(s):
     st.markdown("# 🛒 Product Evaluation Study")
     st.markdown(
-        f"Welcome! In this study you will evaluate **{PRODUCTS_PER_USER} {CATEGORY_DISPLAY[CATEGORY]}** products.\n\n"
         "For each product you will:\n"
         "1. Rate how familiar you are with the product\n"
         "2. Rate how willing you are to buy it\n"
@@ -755,14 +892,19 @@ def screen_demographics(s):
 def screen_product_intro(s):
     idx = s["current_product_index"]
     product = s["products"][idx]
     render_progress(idx + 1)
     st.markdown("## Product Evaluation")
     st.markdown("Please read the product information carefully, then answer the two questions below.")
     st.markdown(render_product_card_html(product), unsafe_allow_html=True)
     familiarity_val = st.radio(
         "How familiar are you with this product?",
-        get_familiarity_choices(),
         index=None,
         key=f"familiarity_{idx}_{product['id']}",
     )
@@ -782,21 +924,20 @@ def screen_product_intro(s):
                 st.error("⚠️ Please rate your willingness to buy.")
                 return
-        familiarity_val = familiarity_val or get_familiarity_choices()[0]
         pre_will_val = pre_will_val or WILLINGNESS_CHOICES[3]
         # Check if we need to swap this product
         if needs_swap(familiarity_val, pre_will_val) and not DEBUG_MODE:
             current_ids = {p["id"] for p in s["products"]}
-            replacement = get_swap_product(exclude_ids=current_ids)
             if replacement:
-                # Return the rejected product to the queue so it gets reviewed by someone else
                 return_product_to_queue(s["products"][idx])
                 s["products"][idx] = make_product_slot(replacement, was_swapped=True)
                 st.info("We've swapped this product for a better match. Please review the new product below.")
                 st.rerun()
                 return
-            # If no replacement found, proceed anyway
         pre_val = parse_willingness(pre_will_val)
         s["products"][idx]["familiarity"] = familiarity_val
@@ -977,7 +1118,8 @@ def screen_reflection(s):
                 "submission_time": end_time,
                 "duration_seconds": round(end_time - s.get("start_time", end_time), 1),
                 "model": MODEL_NAME,
-                "category": CATEGORY,
             }
             with st.spinner("Saving your responses…"):
                 save_and_upload(s)
@@ -998,9 +1140,11 @@ def screen_done(s):
         post = p.get("post_willingness", "?")
         delta = p.get("willingness_delta", 0)
         arrow = "➡️" if delta == 0 else ("⬆️" if delta > 0 else "⬇️")
         rows.append({
             "#": i + 1,
-            "Product": p.get("title", "")[:60] + ("…" if len(p.get("title", "")) > 60 else ""),
             "Before": WILLINGNESS_LABELS.get(pre, str(pre)),
             "After": WILLINGNESS_LABELS.get(post, str(post)),
             "Change": f"{arrow} {delta:+d}" if isinstance(delta, int) else "–",
@@ -1008,22 +1152,20 @@ def screen_done(s):
     import pandas as pd
     st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
-    assignment_id = s.get("assignment_id", "")
-    turk_submit_to = s.get("turk_submit_to", "")
-    if assignment_id and turk_submit_to:
-        submit_url = f"{turk_submit_to}/mturk/externalSubmit"
-        submission_id = s.get("submission_id", "")
-        st.markdown(f"""
-        <form id="mturk-submit-form" method="POST" action="{submit_url}">
-            <input type="hidden" name="assignmentId" value="{assignment_id}" />
-            <input type="hidden" name="submission_id" value="{submission_id}" />
-            <button type="submit" style="
-                background:#2563eb; color:white; border:none; padding:12px 28px;
-                font-size:1rem; border-radius:6px; cursor:pointer; margin-top:12px;">
-                ✅ Submit to MTurk
-            </button>
-        </form>
-        """, unsafe_allow_html=True)
 # ---------------------------------------------------------------------------

 """
 Streamlit App: AI Product Willingness User Study
 =================================================
+Run locally (single category):
     streamlit run src/streamlit_app.py -- --category groceries
     streamlit run src/streamlit_app.py -- --category groceries --debug
+Run locally (mixed mode — movies + groceries):
+    streamlit run src/streamlit_app.py -- --mode mixed
+    streamlit run src/streamlit_app.py -- --mode mixed --debug
 On HuggingFace Spaces, set these environment variables in Space Settings → Variables:
     HF_TOKEN           - HuggingFace token
     TINKER_API_KEY     - Tinker AI API key
     DATASET_REPO_ID    - HuggingFace dataset repo to upload results
+    CATEGORY           - groceries | books | movies | health  (single-category mode)
+    MODE               - mixed  (overrides CATEGORY; runs movies + groceries together)
     DEBUG_MODE         - "true" to skip validation (optional)
 """
 import csv
 import json
 import os
 from dotenv import load_dotenv
 from filelock import FileLock
 from huggingface_hub import HfApi
 load_dotenv()
 # ---------------------------------------------------------------------------
+# CLI args
 # ---------------------------------------------------------------------------
 import argparse
 parser = argparse.ArgumentParser(add_help=False)
 parser.add_argument("--category", choices=["books", "groceries", "movies", "health"], default=None)
+parser.add_argument("--mode", choices=["mixed"], default=None)
 parser.add_argument("--debug", action="store_true", default=False)
 cli_args, _ = parser.parse_known_args()
 # ---------------------------------------------------------------------------
+# Config
 # ---------------------------------------------------------------------------
+MODE = os.getenv("MODE") or cli_args.mode  # "mixed" or None
+CATEGORY = os.getenv("CATEGORY") or cli_args.category or "groceries"  # used only in single-category mode
 DEBUG_MODE = os.getenv("DEBUG_MODE", "").lower() == "true" or cli_args.debug
 DATASET_REPO_ID = os.getenv("DATASET_REPO_ID", "your-username/product-study")
 HF_TOKEN = os.getenv("HF_TOKEN")
+TINKER_API_KEY = os.getenv("TINKER_API_KEY")
+MODEL_NAME = "openai/gpt-oss-20b"
+# ---------------------------------------------------------------------------
+# Mixed-mode constants
+# ---------------------------------------------------------------------------
+# In mixed mode these two categories are always used together
+MIXED_CATEGORIES = ["movies", "groceries"]
+# Each category contributes this many items to the shared pool of 100
+MIXED_SUBSET_SIZE = 50   # 50 movies + 50 groceries = 100 total
+SINGLE_SUBSET_SIZE = 100 # legacy single-category mode
+# ---------------------------------------------------------------------------
+# Prolific config
+# ---------------------------------------------------------------------------
+PROLIFIC_COMPLETION_URL = "https://app.prolific.com/submissions/complete?cc=CYC7ALM1"
+PROLIFIC_COMPLETION_CODE = "CYC7ALM1"
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DATA_DIR = os.path.join(BASE_DIR, "data")
 os.makedirs(ANNOTATIONS_DIR, exist_ok=True)
 CATEGORY_TO_HF = {
+    "books":     "ehejin/amazon_books",
     "groceries": "ehejin/amazon_Grocery_and_Gourmet_Food",
+    "movies":    "ehejin/amazon_Movies_and_TV",
+    "health":    "ehejin/amazon_Health_and_Household",
 }
 CATEGORY_DISPLAY = {
+    "books":     "Books",
     "groceries": "Grocery Products",
+    "movies":    "Movies & TV",
+    "health":    "Health & Household Products",
 }
+# Per-product familiarity label (depends on the individual product's category)
 FAMILIARITY_USED_LABEL = {
+    "books":     "Read it before",
+    "movies":    "Watched it before",
     "groceries": "Used it before",
+    "health":    "Used it before",
 }
 PRODUCTS_PER_USER = 5
 MIN_TURNS = 3
 MAX_TURNS = 10
 # Familiarity values that trigger a product swap
 SWAP_FAMILIARITY = {"Purchased it before"}
 }
 WILLINGNESS_CHOICES = [f"{v} ({k})" for k, v in WILLINGNESS_LABELS.items()]
 # ---------------------------------------------------------------------------
+# Helpers: per-category file paths
 # ---------------------------------------------------------------------------
+def _data_path(category: str, suffix: str) -> str:
+    subset = MIXED_SUBSET_SIZE if MODE == "mixed" else SINGLE_SUBSET_SIZE
+    return os.path.join(DATA_DIR, f"{category}_test{subset}_{suffix}")
+def local_data_path(category: str) -> str:
+    return _data_path(category, "primary.json")
+def overflow_path(category: str) -> str:
+    return _data_path(category, "overflow.json")
+def counter_path(category: str) -> str:
+    return _data_path(category, "counter.txt")
+def counter_lock_path(category: str) -> str:
+    return _data_path(category, "counter.lock")
+def return_queue_path(category: str) -> str:
+    return _data_path(category, "return_queue.json")
+# ---------------------------------------------------------------------------
+# Dataset loading
+# ---------------------------------------------------------------------------
 @st.cache_resource
+def download_and_cache_dataset(category: str, subset_size: int):
+    """Download test split from HuggingFace and cache locally."""
+    primary_path = local_data_path(category)
+    over_path = overflow_path(category)
+    if os.path.exists(primary_path):
+        print(f"[DATA] Found cached dataset for {category} at {primary_path}")
         return
+    print(f"[DATA] Downloading {CATEGORY_TO_HF[category]} (test split, first {subset_size}) from HuggingFace...")
     try:
         from datasets import load_dataset
         import huggingface_hub
         if HF_TOKEN:
             huggingface_hub.login(token=HF_TOKEN)
+        ds = load_dataset(CATEGORY_TO_HF[category], split="test")
         def to_list(val):
             if isinstance(val, list): return val
                 "description": to_list(meta.get("description", []) if isinstance(meta, dict) else []),
                 "features": to_list(meta.get("features", []) if isinstance(meta, dict) else []),
                 "price": meta.get("price", "N/A") if isinstance(meta, dict) else "N/A",
+                "category": category,
             }
             all_items.append(item)
+        primary = all_items[:subset_size]
+        overflow = all_items[subset_size:]
+        with open(primary_path, "w") as f:
             json.dump(primary, f, indent=2)
+        with open(over_path, "w") as f:
             json.dump(overflow, f, indent=2)
+        print(f"[DATA] {category}: cached {len(primary)} primary + {len(overflow)} overflow items.")
     except Exception as e:
+        print(f"[DATA] ERROR downloading {category}: {e}")
         raise
 @st.cache_resource
+def load_primary_dataset(category: str):
+    with open(local_data_path(category), "r") as f:
         return json.load(f)
 @st.cache_resource
+def load_overflow_dataset(category: str):
+    path = overflow_path(category)
+    if not os.path.exists(path):
         return []
+    with open(path, "r") as f:
         return json.load(f)
+def _ensure_datasets():
+    """Download/cache all needed category datasets."""
+    if MODE == "mixed":
+        for cat in MIXED_CATEGORIES:
+            download_and_cache_dataset(cat, MIXED_SUBSET_SIZE)
+    else:
+        download_and_cache_dataset(CATEGORY, SINGLE_SUBSET_SIZE)
+# ---------------------------------------------------------------------------
+# Per-category counter helpers
+# ---------------------------------------------------------------------------
+def _read_counter(category: str) -> int:
+    path = counter_path(category)
+    if not os.path.exists(path):
+        return 0
+    with open(path, "r") as f:
+        return int(f.read().strip() or "0")
+def _write_counter(category: str, value: int):
+    with open(counter_path(category), "w") as f:
+        f.write(str(value))
+def _read_return_queue(category: str) -> list:
+    path = return_queue_path(category)
+    if not os.path.exists(path):
+        return []
+    with open(path, "r") as f:
+        try:
+            return json.load(f)
+        except Exception:
+            return []
+def _write_return_queue(category: str, queue: list):
+    with open(return_queue_path(category), "w") as f:
+        json.dump(queue, f)
+# ---------------------------------------------------------------------------
+# Product assignment
+# ---------------------------------------------------------------------------
+def _assign_from_category(category: str, n: int) -> list:
     """
+    Atomically assign n products from a single category pool.
+    - Drains the return queue first.
+    - Pulls sequentially from the primary pool.
+    - Wraps around (modulo pool size) when exhausted so user 21+ still get valid items.
     """
+    items = load_primary_dataset(category)
     total = len(items)
+    lock = FileLock(counter_lock_path(category))
+    with lock:
+        return_queue = _read_return_queue(category)
+        counter = _read_counter(category)
         assigned = []
         for _ in range(n):
             if return_queue:
                 assigned.append(return_queue.pop(0))
             else:
+                # Wrap-around: counter mod total so we cycle through items
+                assigned.append(items[counter % total])
+                counter += 1
+        _write_return_queue(category, return_queue)
+        _write_counter(category, counter)
     return assigned
+def assign_mixed_products(n: int = PRODUCTS_PER_USER) -> list:
     """
+    Assign n products split across movies and groceries.
+    Alternates the majority category each call so coverage stays balanced.
+    User 1: 3 movies + 2 groceries
+    User 2: 2 movies + 3 groceries
+    User 3: 3 movies + 2 groceries  ... etc.
+    The split is decided by reading the movies counter parity (even → movies gets 3).
     """
+    movies_counter = _read_counter("movies")
+    # Even call-count → movies gets the larger share
+    if (movies_counter // 1) % 2 == 0:
+        n_movies, n_groceries = 3, 2
+    else:
+        n_movies, n_groceries = 2, 3
+    # Clamp in case n != 5
+    if n_movies + n_groceries != n:
+        n_movies = n // 2
+        n_groceries = n - n_movies
+    movie_items    = _assign_from_category("movies",    n_movies)
+    grocery_items  = _assign_from_category("groceries", n_groceries)
+    combined = movie_items + grocery_items
+    random.shuffle(combined)  # mix so user doesn't see all movies then all groceries
+    return combined
+def assign_products(n: int = PRODUCTS_PER_USER) -> list:
+    """Dispatcher: mixed mode or single-category mode."""
+    if MODE == "mixed":
+        return assign_mixed_products(n)
+    # Single-category (legacy behaviour)
+    return _assign_from_category(CATEGORY, n)
+def return_product_to_queue(product: dict):
+    """Put a rejected/swapped product back so it gets reassigned."""
+    cat = product.get("category", CATEGORY)
+    lock = FileLock(counter_lock_path(cat))
     with lock:
+        queue = _read_return_queue(cat)
         if not any(p["id"] == product["id"] for p in queue):
             queue.append(product)
+        _write_return_queue(cat, queue)
+def get_swap_product(exclude_ids: set, category: str) -> dict | None:
     """
+    Get a replacement product for the given category.
+    1. Next unassigned primary product (advances counter).
+    2. Wrap-around: any primary product not held by this user.
+    3. Overflow pool.
     """
+    items    = load_primary_dataset(category)
+    overflow = load_overflow_dataset(category)
+    total    = len(items)
+    lock = FileLock(counter_lock_path(category))
     with lock:
+        counter = _read_counter(category)
+        # 1. Unassigned (with wrap-around awareness)
+        attempts = 0
+        while attempts < total:
+            candidate = items[counter % total]
             counter += 1
+            attempts += 1
             if candidate["id"] not in exclude_ids:
+                _write_counter(category, counter)
                 return candidate
+        # 2. Any primary product not held by this user
         for p in items:
             if p["id"] not in exclude_ids:
                 return p
+        # 3. Overflow
         for p in overflow:
             if p["id"] not in exclude_ids:
                 return p
+    return None
 # ---------------------------------------------------------------------------
+# AI client (Tinker)
 # ---------------------------------------------------------------------------
 @st.cache_resource
+def get_tinker_clients():
+    """Initialise and cache Tinker sampling client, renderer, and tokenizer."""
+    import tinker
+    from tinker import types as tinker_types
+    from tinker_cookbook import renderers
+    from tinker_cookbook.tokenizer_utils import get_tokenizer
+    from tinker_cookbook.model_info import get_recommended_renderer_name
+    service_client = tinker.ServiceClient()
+    sampling_client = service_client.create_sampling_client(base_model=MODEL_NAME)
+    tokenizer = get_tokenizer(MODEL_NAME)
+    renderer_name = get_recommended_renderer_name(MODEL_NAME)
+    renderer = renderers.get_renderer(renderer_name, tokenizer)
+    return sampling_client, renderer, tinker_types
+def call_model(messages: list) -> str:
+    try:
+        from tinker_cookbook import renderers as tinker_renderers
+        sampling_client, renderer, tinker_types = get_tinker_clients()
+        prompt = renderer.build_generation_prompt(messages)
+        params = tinker_types.SamplingParams(
+            max_tokens=1000,
+            temperature=0.7,
+            stop=renderer.get_stop_sequences(),
+        )
+        result = sampling_client.sample(
+            prompt=prompt,
+            sampling_params=params,
+            num_samples=1,
+        ).result()
+        parsed_message, _ = renderer.parse_response(result.sequences[0].tokens)
+        content = tinker_renderers.format_content_as_string(parsed_message["content"])
+        content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
+        return content
+    except Exception as e:
+        print(f"[MODEL] Tinker error: {e}")
+        return f"[Model error: {e}]"
 # ---------------------------------------------------------------------------
 def save_and_upload(state: dict):
     hf_api = get_hf_api()
+    worker_id = state.get("prolific_pid") or state.get("user_id", "anonymous")
     submission_id = state.get("submission_id", str(uuid.uuid4()))
     safe_worker = "".join(c if c.isalnum() else "_" for c in str(worker_id))
+    mode_tag = state.get("mode", "single")
+    filename = f"{submission_id}_{mode_tag}.json"
     folder = os.path.join(ANNOTATIONS_DIR, safe_worker)
     os.makedirs(folder, exist_ok=True)
     file_path = os.path.join(folder, filename)
     demographics = state.get("demographics", {})
     products = state.get("products", [])
     header = [
+        "submission_id", "prolific_pid", "study_id", "session_id",
+        "submission_time", "duration_seconds", "mode", "category",
         "age", "gender", "geographic_region", "education_level", "race",
         "us_citizen", "marital_status", "religion", "religious_attendance",
         "political_affiliation", "income", "political_views", "household_size", "employment_status",
         post = prod.get("post_willingness", "")
         delta = (post - pre) if isinstance(pre, int) and isinstance(post, int) else ""
         row = [
+            submission_id,
+            state.get("prolific_pid", ""),
+            state.get("study_id", ""),
+            state.get("session_id", ""),
             state.get("meta", {}).get("submission_time", ""),
             state.get("meta", {}).get("duration_seconds", ""),
+            state.get("mode", "single"),
+            prod.get("category", ""),   # per-product category
             demographics.get("age", ""), demographics.get("gender", ""),
             demographics.get("geographic_region", ""), demographics.get("education_level", ""),
             demographics.get("race", ""), demographics.get("us_citizen", ""),
         return 4
+def get_familiarity_choices(category: str) -> list:
+    """Return familiarity options with the correct 'used' label for this product's category."""
+    used_label = FAMILIARITY_USED_LABEL.get(category, "Used it before")
     return [
         "Never heard of it",
         "Heard of it, but not used/purchased",
 def needs_swap(familiarity_val: str, pre_will_val: str) -> bool:
     if familiarity_val in SWAP_FAMILIARITY:
         return True
     if pre_will_val == WILLINGNESS_CHOICES[-1]:  # "Definitely would buy (7)"
     return False
+# ---------------------------------------------------------------------------
+# Welcome screen helpers
+# ---------------------------------------------------------------------------
+def study_display_name() -> str:
+    """Human-readable name for what the user will evaluate."""
+    if MODE == "mixed":
+        return "Movies & TV and Grocery Products"
+    return CATEGORY_DISPLAY.get(CATEGORY, CATEGORY)
+def study_category_breakdown() -> str:
+    """Extra sentence shown on welcome screen describing the mix."""
+    if MODE == "mixed":
+        return (
+            "You will evaluate a mix of **Movies & TV** and **Grocery Products** "
+            "(roughly 2–3 of each)."
+        )
+    return ""
 # ---------------------------------------------------------------------------
 # State initialisation
 # ---------------------------------------------------------------------------
         "description": p.get("description", []),
         "features": p.get("features", []),
         "price": p.get("price", "N/A"),
+        "category": p.get("category", CATEGORY),   # ← per-product category
         "familiarity": None,
         "pre_willingness": None,
         "post_willingness": None,
 def init_state():
+    _ensure_datasets()
     assigned = assign_products(PRODUCTS_PER_USER)
     try:
     return {
         "submission_id": str(uuid.uuid4()),
         "user_id": str(uuid.uuid4()),
+        "prolific_pid": params.get("PROLIFIC_PID", ""),
+        "study_id":     params.get("STUDY_ID", ""),
+        "session_id":   params.get("SESSION_ID", ""),
         "start_time": time.time(),
+        "mode": MODE or "single",
+        "category": CATEGORY if MODE != "mixed" else "mixed",
         "demographics": {},
         "products": [make_product_slot(p) for p in assigned],
         "current_product_index": 0,
     }
     .pc-title { font-size: 1.05rem; font-weight: 700; color: #1a1a2e; line-height: 1.35; flex: 1; }
     .pc-price { font-size: 1.2rem; font-weight: 800; color: #16a34a; white-space: nowrap; }
+    .pc-category-badge {
+        display: inline-block;
+        font-size: 0.75rem; font-weight: 600;
+        padding: 0.15rem 0.55rem;
+        border-radius: 99px;
+        margin-bottom: 0.4rem;
+        background: #dbeafe; color: #1e40af;
+    }
     .pc-section { margin-top: 0.5rem; }
     .pc-section-title {
         font-weight: 600; font-size: 0.85rem; color: #475569;
     price = product.get("price", "N/A")
     description = product.get("description", [])
     features = product.get("features", [])
+    category = product.get("category", "")
     price_str = f"${price}" if price and price != "N/A" and not str(price).startswith("$") else price
+    # Category badge — only shown in mixed mode
+    badge_html = ""
+    if MODE == "mixed" and category:
+        badge_label = CATEGORY_DISPLAY.get(category, category)
+        badge_html = f'<div class="pc-category-badge">📂 {badge_label}</div>'
     desc_html = ""
     if description:
         desc_text = " ".join(d for d in description if d)
         desc_html = f'<div class="pc-section"><div class="pc-section-title">📋 Description</div><div class="pc-desc">{desc_text}</div></div>'
     feat_html = ""
     if features:
         items_html = "".join(f"<li>{feat}</li>" for feat in features if feat)
     max_h = "max-height:240px;overflow-y:auto;" if compact else ""
     return f"""
     <div class="product-card" style="{max_h}">
+        {badge_html}
         <div class="pc-header">
             <div class="pc-title">{title}</div>
             <div class="pc-price">{price_str}</div>
 # ---------------------------------------------------------------------------
 def screen_welcome(s):
     st.markdown("# 🛒 Product Evaluation Study")
+    breakdown = study_category_breakdown()
     st.markdown(
+        f"Welcome! In this study you will evaluate **{PRODUCTS_PER_USER} {study_display_name()}** products.\n\n"
+        + (f"{breakdown}\n\n" if breakdown else "")
+        +
         "For each product you will:\n"
         "1. Rate how familiar you are with the product\n"
         "2. Rate how willing you are to buy it\n"
 def screen_product_intro(s):
     idx = s["current_product_index"]
     product = s["products"][idx]
+    product_category = product.get("category", CATEGORY)
     render_progress(idx + 1)
     st.markdown("## Product Evaluation")
     st.markdown("Please read the product information carefully, then answer the two questions below.")
     st.markdown(render_product_card_html(product), unsafe_allow_html=True)
+    # Use per-product familiarity choices based on the product's own category
+    familiarity_choices = get_familiarity_choices(product_category)
     familiarity_val = st.radio(
         "How familiar are you with this product?",
+        familiarity_choices,
         index=None,
         key=f"familiarity_{idx}_{product['id']}",
     )
                 st.error("⚠️ Please rate your willingness to buy.")
                 return
+        familiarity_val = familiarity_val or familiarity_choices[0]
         pre_will_val = pre_will_val or WILLINGNESS_CHOICES[3]
         # Check if we need to swap this product
         if needs_swap(familiarity_val, pre_will_val) and not DEBUG_MODE:
             current_ids = {p["id"] for p in s["products"]}
+            replacement = get_swap_product(exclude_ids=current_ids, category=product_category)
             if replacement:
                 return_product_to_queue(s["products"][idx])
                 s["products"][idx] = make_product_slot(replacement, was_swapped=True)
                 st.info("We've swapped this product for a better match. Please review the new product below.")
                 st.rerun()
                 return
+            # No replacement found — proceed with this product anyway
         pre_val = parse_willingness(pre_will_val)
         s["products"][idx]["familiarity"] = familiarity_val
                 "submission_time": end_time,
                 "duration_seconds": round(end_time - s.get("start_time", end_time), 1),
                 "model": MODEL_NAME,
+                "mode": MODE or "single",
+                "category": CATEGORY if MODE != "mixed" else "mixed",
             }
             with st.spinner("Saving your responses…"):
                 save_and_upload(s)
         post = p.get("post_willingness", "?")
         delta = p.get("willingness_delta", 0)
         arrow = "➡️" if delta == 0 else ("⬆️" if delta > 0 else "⬇️")
+        cat_label = CATEGORY_DISPLAY.get(p.get("category", ""), "") if MODE == "mixed" else ""
         rows.append({
             "#": i + 1,
+            **({"Category": cat_label} if MODE == "mixed" else {}),
+            "Product": p.get("title", "")[:55] + ("…" if len(p.get("title", "")) > 55 else ""),
             "Before": WILLINGNESS_LABELS.get(pre, str(pre)),
             "After": WILLINGNESS_LABELS.get(post, str(post)),
             "Change": f"{arrow} {delta:+d}" if isinstance(delta, int) else "–",
     import pandas as pd
     st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
+    st.markdown("---")
+    st.success(
+        f"**Your completion code:** `{PROLIFIC_COMPLETION_CODE}`\n\n"
+        "You can either click the button below to return to Prolific automatically, "
+        "or copy the code above and paste it on the Prolific website."
+    )
+    st.markdown(
+        f"""<a href="{PROLIFIC_COMPLETION_URL}" target="_self">
+        <button style="background:#2563eb;color:white;border:none;padding:12px 28px;
+                       font-size:1rem;border-radius:6px;cursor:pointer;margin-top:8px;">
+            ✅ Return to Prolific
+        </button></a>""",
+        unsafe_allow_html=True,
+    )
 # ---------------------------------------------------------------------------