Spaces:
Sleeping
Sleeping
Commit Β·
0aa6a46
1
Parent(s): 6e78cc3
Fix inference action sanitization and improve mobile responsiveness
Browse files- Sanitize LLM action output to strip empty strings and irrelevant fields,
preventing Pydantic 422 validation errors on /step endpoint
- Remove default for HF_TOKEN (per hackathon requirements)
- Improve system prompt for better structured JSON output from LLM
- Add fallback API key support (GROQ_API_KEY, API_KEY)
- Handle 422 errors gracefully in _step() with penalty reward
- Enhance mobile CSS: add 420px breakpoint, fix spotlight guide on mobile,
touch-friendly button sizing, prevent horizontal overflow
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- frontend/src/styles.css +251 -2
- inference.py +64 -14
frontend/src/styles.css
CHANGED
|
@@ -31,6 +31,8 @@ body {
|
|
| 31 |
line-height: 1.55;
|
| 32 |
-webkit-font-smoothing: antialiased;
|
| 33 |
-moz-osx-font-smoothing: grayscale;
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
|
| 36 |
.shell {
|
|
@@ -820,6 +822,13 @@ button:disabled {
|
|
| 820 |
box-shadow: none;
|
| 821 |
}
|
| 822 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 823 |
/* ββ Responsive ββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 824 |
|
| 825 |
@media (max-width: 1180px) {
|
|
@@ -848,13 +857,26 @@ button:disabled {
|
|
| 848 |
|
| 849 |
@media (max-width: 760px) {
|
| 850 |
.shell {
|
| 851 |
-
padding:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 852 |
}
|
| 853 |
|
| 854 |
.topbar,
|
| 855 |
.panel {
|
| 856 |
border-radius: var(--radius-sm);
|
| 857 |
-
padding:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 858 |
}
|
| 859 |
|
| 860 |
.actions {
|
|
@@ -864,14 +886,22 @@ button:disabled {
|
|
| 864 |
.actions button,
|
| 865 |
.actions select {
|
| 866 |
width: 100%;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
}
|
| 868 |
|
| 869 |
.kpi-grid {
|
| 870 |
grid-template-columns: 1fr 1fr;
|
|
|
|
| 871 |
}
|
| 872 |
|
| 873 |
.med-grid {
|
| 874 |
grid-template-columns: 1fr;
|
|
|
|
| 875 |
}
|
| 876 |
|
| 877 |
.stack-two {
|
|
@@ -886,6 +916,13 @@ button:disabled {
|
|
| 886 |
left: 10px !important;
|
| 887 |
right: 10px !important;
|
| 888 |
max-width: calc(100vw - 20px) !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
}
|
| 890 |
|
| 891 |
.guide-footer,
|
|
@@ -893,9 +930,221 @@ button:disabled {
|
|
| 893 |
flex-direction: column;
|
| 894 |
}
|
| 895 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 896 |
.logs {
|
| 897 |
max-height: 200px;
|
| 898 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 899 |
}
|
| 900 |
|
| 901 |
/* ββ Scrollbar ββββοΏ½οΏ½ββββββββββββββββββββββββββββββββββββββββββββ */
|
|
|
|
| 31 |
line-height: 1.55;
|
| 32 |
-webkit-font-smoothing: antialiased;
|
| 33 |
-moz-osx-font-smoothing: grayscale;
|
| 34 |
+
-webkit-tap-highlight-color: transparent;
|
| 35 |
+
-webkit-text-size-adjust: 100%;
|
| 36 |
}
|
| 37 |
|
| 38 |
.shell {
|
|
|
|
| 822 |
box-shadow: none;
|
| 823 |
}
|
| 824 |
|
| 825 |
+
/* ββ Global mobile overflow prevention βββββββββββββββββββββββββ */
|
| 826 |
+
|
| 827 |
+
html, body {
|
| 828 |
+
overflow-x: hidden;
|
| 829 |
+
width: 100%;
|
| 830 |
+
}
|
| 831 |
+
|
| 832 |
/* ββ Responsive ββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 833 |
|
| 834 |
@media (max-width: 1180px) {
|
|
|
|
| 857 |
|
| 858 |
@media (max-width: 760px) {
|
| 859 |
.shell {
|
| 860 |
+
padding: 10px 10px 24px;
|
| 861 |
+
}
|
| 862 |
+
|
| 863 |
+
.container {
|
| 864 |
+
width: 100%;
|
| 865 |
}
|
| 866 |
|
| 867 |
.topbar,
|
| 868 |
.panel {
|
| 869 |
border-radius: var(--radius-sm);
|
| 870 |
+
padding: 14px 14px;
|
| 871 |
+
}
|
| 872 |
+
|
| 873 |
+
.topbar {
|
| 874 |
+
gap: 12px;
|
| 875 |
+
}
|
| 876 |
+
|
| 877 |
+
.topbar-right {
|
| 878 |
+
width: 100%;
|
| 879 |
+
justify-content: space-between;
|
| 880 |
}
|
| 881 |
|
| 882 |
.actions {
|
|
|
|
| 886 |
.actions button,
|
| 887 |
.actions select {
|
| 888 |
width: 100%;
|
| 889 |
+
min-height: 46px;
|
| 890 |
+
}
|
| 891 |
+
|
| 892 |
+
.layout {
|
| 893 |
+
gap: 14px;
|
| 894 |
+
margin-top: 14px;
|
| 895 |
}
|
| 896 |
|
| 897 |
.kpi-grid {
|
| 898 |
grid-template-columns: 1fr 1fr;
|
| 899 |
+
gap: 8px;
|
| 900 |
}
|
| 901 |
|
| 902 |
.med-grid {
|
| 903 |
grid-template-columns: 1fr;
|
| 904 |
+
max-height: none;
|
| 905 |
}
|
| 906 |
|
| 907 |
.stack-two {
|
|
|
|
| 916 |
left: 10px !important;
|
| 917 |
right: 10px !important;
|
| 918 |
max-width: calc(100vw - 20px) !important;
|
| 919 |
+
top: auto !important;
|
| 920 |
+
bottom: 20px !important;
|
| 921 |
+
width: calc(100vw - 20px) !important;
|
| 922 |
+
}
|
| 923 |
+
|
| 924 |
+
.spotlight-ring {
|
| 925 |
+
display: none;
|
| 926 |
}
|
| 927 |
|
| 928 |
.guide-footer,
|
|
|
|
| 930 |
flex-direction: column;
|
| 931 |
}
|
| 932 |
|
| 933 |
+
.spotlight-tooltip-footer button {
|
| 934 |
+
width: 100%;
|
| 935 |
+
}
|
| 936 |
+
|
| 937 |
+
.app-footer p {
|
| 938 |
+
font-size: 0.72rem;
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
.logs {
|
| 942 |
max-height: 200px;
|
| 943 |
}
|
| 944 |
+
|
| 945 |
+
.logs div {
|
| 946 |
+
overflow-x: auto;
|
| 947 |
+
white-space: normal;
|
| 948 |
+
word-break: break-all;
|
| 949 |
+
}
|
| 950 |
+
|
| 951 |
+
.risk-labels {
|
| 952 |
+
flex-direction: column;
|
| 953 |
+
gap: 4px;
|
| 954 |
+
}
|
| 955 |
+
|
| 956 |
+
.conditions-row {
|
| 957 |
+
gap: 6px;
|
| 958 |
+
}
|
| 959 |
+
|
| 960 |
+
.condition-tag {
|
| 961 |
+
font-size: 0.7rem;
|
| 962 |
+
padding: 3px 10px;
|
| 963 |
+
}
|
| 964 |
+
|
| 965 |
+
.budget-note {
|
| 966 |
+
font-size: 0.82rem;
|
| 967 |
+
padding: 12px 14px;
|
| 968 |
+
}
|
| 969 |
+
|
| 970 |
+
.history-list {
|
| 971 |
+
max-height: 240px;
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
.history-item strong {
|
| 975 |
+
word-break: break-word;
|
| 976 |
+
}
|
| 977 |
+
|
| 978 |
+
.med-card-header {
|
| 979 |
+
flex-wrap: wrap;
|
| 980 |
+
}
|
| 981 |
+
|
| 982 |
+
.med-details {
|
| 983 |
+
flex-wrap: wrap;
|
| 984 |
+
}
|
| 985 |
+
|
| 986 |
+
.beers-flags {
|
| 987 |
+
gap: 4px;
|
| 988 |
+
}
|
| 989 |
+
|
| 990 |
+
.beers-tag {
|
| 991 |
+
font-size: 0.65rem;
|
| 992 |
+
padding: 2px 8px;
|
| 993 |
+
}
|
| 994 |
+
}
|
| 995 |
+
|
| 996 |
+
/* ββ Small phones (β€ 420px) βββββββββββββββββββββββββββββββββββ */
|
| 997 |
+
|
| 998 |
+
@media (max-width: 420px) {
|
| 999 |
+
.shell {
|
| 1000 |
+
padding: 6px 6px 20px;
|
| 1001 |
+
}
|
| 1002 |
+
|
| 1003 |
+
.topbar,
|
| 1004 |
+
.panel {
|
| 1005 |
+
padding: 12px 10px;
|
| 1006 |
+
border-radius: 8px;
|
| 1007 |
+
}
|
| 1008 |
+
|
| 1009 |
+
.title-wrap {
|
| 1010 |
+
min-width: 0;
|
| 1011 |
+
}
|
| 1012 |
+
|
| 1013 |
+
.title-wrap h1 {
|
| 1014 |
+
font-size: 0.95rem;
|
| 1015 |
+
overflow: hidden;
|
| 1016 |
+
text-overflow: ellipsis;
|
| 1017 |
+
white-space: nowrap;
|
| 1018 |
+
}
|
| 1019 |
+
|
| 1020 |
+
.title-wrap p {
|
| 1021 |
+
font-size: 0.72rem;
|
| 1022 |
+
}
|
| 1023 |
+
|
| 1024 |
+
.layout {
|
| 1025 |
+
gap: 10px;
|
| 1026 |
+
margin-top: 10px;
|
| 1027 |
+
}
|
| 1028 |
+
|
| 1029 |
+
.kpi-grid {
|
| 1030 |
+
grid-template-columns: 1fr;
|
| 1031 |
+
gap: 6px;
|
| 1032 |
+
}
|
| 1033 |
+
|
| 1034 |
+
.kpi-grid div {
|
| 1035 |
+
padding: 10px 12px;
|
| 1036 |
+
}
|
| 1037 |
+
|
| 1038 |
+
.kpi-grid span {
|
| 1039 |
+
font-size: 0.68rem;
|
| 1040 |
+
margin-bottom: 4px;
|
| 1041 |
+
}
|
| 1042 |
+
|
| 1043 |
+
.kpi-grid strong {
|
| 1044 |
+
font-size: 0.88rem;
|
| 1045 |
+
}
|
| 1046 |
+
|
| 1047 |
+
.med-card {
|
| 1048 |
+
padding: 12px 12px;
|
| 1049 |
+
}
|
| 1050 |
+
|
| 1051 |
+
.med-card-header strong {
|
| 1052 |
+
font-size: 0.84rem;
|
| 1053 |
+
}
|
| 1054 |
+
|
| 1055 |
+
.spotlight-tooltip {
|
| 1056 |
+
padding: 14px;
|
| 1057 |
+
bottom: 10px !important;
|
| 1058 |
+
}
|
| 1059 |
+
|
| 1060 |
+
.spotlight-tooltip-header h3 {
|
| 1061 |
+
font-size: 0.9rem;
|
| 1062 |
+
}
|
| 1063 |
+
|
| 1064 |
+
.spotlight-tooltip-body {
|
| 1065 |
+
font-size: 0.8rem;
|
| 1066 |
+
margin-bottom: 14px;
|
| 1067 |
+
line-height: 1.55;
|
| 1068 |
+
}
|
| 1069 |
+
|
| 1070 |
+
.panel h2 {
|
| 1071 |
+
font-size: 0.75rem;
|
| 1072 |
+
margin-bottom: 10px;
|
| 1073 |
+
}
|
| 1074 |
+
|
| 1075 |
+
.panel h3 {
|
| 1076 |
+
font-size: 0.72rem;
|
| 1077 |
+
}
|
| 1078 |
+
|
| 1079 |
+
.submit-btn {
|
| 1080 |
+
min-height: 48px;
|
| 1081 |
+
font-size: 0.88rem;
|
| 1082 |
+
}
|
| 1083 |
+
|
| 1084 |
+
.logs {
|
| 1085 |
+
max-height: 160px;
|
| 1086 |
+
font-size: 0.75rem;
|
| 1087 |
+
gap: 6px;
|
| 1088 |
+
}
|
| 1089 |
+
|
| 1090 |
+
.logs div {
|
| 1091 |
+
padding: 8px 10px;
|
| 1092 |
+
line-height: 1.4;
|
| 1093 |
+
}
|
| 1094 |
+
|
| 1095 |
+
.history-item {
|
| 1096 |
+
padding: 10px 12px;
|
| 1097 |
+
}
|
| 1098 |
+
|
| 1099 |
+
.history-item strong {
|
| 1100 |
+
font-size: 0.82rem;
|
| 1101 |
+
}
|
| 1102 |
+
|
| 1103 |
+
.history-detail {
|
| 1104 |
+
font-size: 0.78rem;
|
| 1105 |
+
}
|
| 1106 |
+
|
| 1107 |
+
.severity-tag, .intervention-tag {
|
| 1108 |
+
font-size: 0.65rem;
|
| 1109 |
+
padding: 2px 8px;
|
| 1110 |
+
}
|
| 1111 |
+
|
| 1112 |
+
button, select, input {
|
| 1113 |
+
font-size: 0.85rem;
|
| 1114 |
+
min-height: 46px;
|
| 1115 |
+
padding: 10px 12px;
|
| 1116 |
+
}
|
| 1117 |
+
|
| 1118 |
+
.action-row label,
|
| 1119 |
+
.field-group label {
|
| 1120 |
+
font-size: 0.72rem;
|
| 1121 |
+
}
|
| 1122 |
+
|
| 1123 |
+
.status-chip {
|
| 1124 |
+
font-size: 0.65rem;
|
| 1125 |
+
padding: 4px 10px;
|
| 1126 |
+
}
|
| 1127 |
+
|
| 1128 |
+
.guide-trigger {
|
| 1129 |
+
width: 30px !important;
|
| 1130 |
+
height: 30px;
|
| 1131 |
+
min-height: 30px;
|
| 1132 |
+
font-size: 0.85rem;
|
| 1133 |
+
}
|
| 1134 |
+
|
| 1135 |
+
.guide-counter {
|
| 1136 |
+
font-size: 0.68rem;
|
| 1137 |
+
padding: 3px 8px;
|
| 1138 |
+
}
|
| 1139 |
+
|
| 1140 |
+
.app-footer {
|
| 1141 |
+
margin-top: 16px;
|
| 1142 |
+
}
|
| 1143 |
+
|
| 1144 |
+
.app-footer p {
|
| 1145 |
+
font-size: 0.68rem;
|
| 1146 |
+
line-height: 1.4;
|
| 1147 |
+
}
|
| 1148 |
}
|
| 1149 |
|
| 1150 |
/* ββ Scrollbar ββββοΏ½οΏ½ββββββββββββββββββββββββββββββββββββββββββββ */
|
inference.py
CHANGED
|
@@ -17,12 +17,20 @@ import os
|
|
| 17 |
import re
|
| 18 |
from typing import Any, Dict, List
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
import requests
|
| 21 |
from openai import OpenAI
|
| 22 |
|
| 23 |
-
API_BASE_URL = os.getenv("API_BASE_URL", "https://
|
| 24 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "
|
| 25 |
-
HF_TOKEN = os.getenv("HF_TOKEN"
|
|
|
|
|
|
|
| 26 |
ENV_URL = os.getenv("POLYPHARMACY_ENV_URL", "http://localhost:7860").rstrip("/")
|
| 27 |
|
| 28 |
BENCHMARK = "polypharmacy_env"
|
|
@@ -31,14 +39,23 @@ MAX_STEPS = 16
|
|
| 31 |
TEMPERATURE = 0.0
|
| 32 |
MAX_TOKENS = 220
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
SYSTEM_PROMPT = (
|
| 35 |
-
"You are a clinical-pharmacist agent. "
|
| 36 |
-
"
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
'"intervention_type":"stop|dose_reduce|substitute|add_monitoring",'
|
| 40 |
-
'"
|
| 41 |
-
"
|
|
|
|
|
|
|
|
|
|
| 42 |
)
|
| 43 |
|
| 44 |
|
|
@@ -83,12 +100,43 @@ def _safe_json(text: str) -> Dict[str, Any]:
|
|
| 83 |
try:
|
| 84 |
data = json.loads(text)
|
| 85 |
if isinstance(data, dict):
|
| 86 |
-
return data
|
| 87 |
except Exception:
|
| 88 |
pass
|
| 89 |
return {"action_type": "finish_review"}
|
| 90 |
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
def _llm_action(client: OpenAI, obs: Dict[str, Any]) -> Dict[str, Any]:
|
| 93 |
meds = obs.get("current_medications", [])
|
| 94 |
summary = {
|
|
@@ -129,6 +177,9 @@ def _reset(task_id: str) -> Dict[str, Any]:
|
|
| 129 |
|
| 130 |
def _step(action: Dict[str, Any]) -> Dict[str, Any]:
|
| 131 |
r = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=45)
|
|
|
|
|
|
|
|
|
|
| 132 |
r.raise_for_status()
|
| 133 |
return r.json()
|
| 134 |
|
|
@@ -168,17 +219,16 @@ def run_task(client: OpenAI, task_id: str) -> None:
|
|
| 168 |
success = score > 0.0
|
| 169 |
break
|
| 170 |
except Exception:
|
| 171 |
-
# Still emit END to keep evaluator parser stable.
|
| 172 |
success = False
|
| 173 |
finally:
|
| 174 |
log_end(success=success, steps=steps, score=score, rewards=rewards)
|
| 175 |
|
| 176 |
|
| 177 |
def main() -> int:
|
| 178 |
-
if not
|
| 179 |
-
print("HF_TOKEN is required", flush=True)
|
| 180 |
return 1
|
| 181 |
-
client = OpenAI(base_url=API_BASE_URL, api_key=
|
| 182 |
for task in TASKS:
|
| 183 |
run_task(client, task)
|
| 184 |
return 0
|
|
|
|
| 17 |
import re
|
| 18 |
from typing import Any, Dict, List
|
| 19 |
|
| 20 |
+
try:
|
| 21 |
+
from dotenv import load_dotenv
|
| 22 |
+
load_dotenv()
|
| 23 |
+
except ImportError:
|
| 24 |
+
pass
|
| 25 |
+
|
| 26 |
import requests
|
| 27 |
from openai import OpenAI
|
| 28 |
|
| 29 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.groq.com/openai/v1")
|
| 30 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "llama-3.3-70b-versatile")
|
| 31 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 32 |
+
# Also accept GROQ_API_KEY or API_KEY as fallback for the token
|
| 33 |
+
_API_KEY = HF_TOKEN or os.getenv("GROQ_API_KEY") or os.getenv("API_KEY")
|
| 34 |
ENV_URL = os.getenv("POLYPHARMACY_ENV_URL", "http://localhost:7860").rstrip("/")
|
| 35 |
|
| 36 |
BENCHMARK = "polypharmacy_env"
|
|
|
|
| 39 |
TEMPERATURE = 0.0
|
| 40 |
MAX_TOKENS = 220
|
| 41 |
|
| 42 |
+
VALID_ACTION_TYPES = {"query_ddi", "propose_intervention", "finish_review"}
|
| 43 |
+
VALID_INTERVENTIONS = {"stop", "dose_reduce", "substitute", "add_monitoring"}
|
| 44 |
+
|
| 45 |
SYSTEM_PROMPT = (
|
| 46 |
+
"You are a clinical-pharmacist agent reviewing an elderly patient's medications. "
|
| 47 |
+
"You MUST return ONLY a single JSON object (no markdown, no explanation). "
|
| 48 |
+
"The action_type MUST be exactly one of: query_ddi, propose_intervention, finish_review. "
|
| 49 |
+
"Schema for query_ddi: "
|
| 50 |
+
'{"action_type":"query_ddi","drug_id_1":"DRUG_X","drug_id_2":"DRUG_Y"} '
|
| 51 |
+
"Schema for propose_intervention: "
|
| 52 |
+
'{"action_type":"propose_intervention","target_drug_id":"DRUG_X",'
|
| 53 |
'"intervention_type":"stop|dose_reduce|substitute|add_monitoring",'
|
| 54 |
+
'"rationale":"reason"} '
|
| 55 |
+
"Schema for finish_review: "
|
| 56 |
+
'{"action_type":"finish_review"} '
|
| 57 |
+
"Strategy: First query_ddi for high-risk drug pairs (especially those with beers_flags). "
|
| 58 |
+
"Then propose_intervention for dangerous findings. Finally finish_review."
|
| 59 |
)
|
| 60 |
|
| 61 |
|
|
|
|
| 100 |
try:
|
| 101 |
data = json.loads(text)
|
| 102 |
if isinstance(data, dict):
|
| 103 |
+
return _sanitize_action(data)
|
| 104 |
except Exception:
|
| 105 |
pass
|
| 106 |
return {"action_type": "finish_review"}
|
| 107 |
|
| 108 |
|
| 109 |
+
def _sanitize_action(raw: Dict[str, Any]) -> Dict[str, Any]:
|
| 110 |
+
"""Build a clean action dict with only the fields relevant to the action type."""
|
| 111 |
+
atype = raw.get("action_type", "")
|
| 112 |
+
if atype not in VALID_ACTION_TYPES:
|
| 113 |
+
return {"action_type": "finish_review"}
|
| 114 |
+
|
| 115 |
+
if atype == "query_ddi":
|
| 116 |
+
return {
|
| 117 |
+
"action_type": "query_ddi",
|
| 118 |
+
"drug_id_1": raw.get("drug_id_1") or None,
|
| 119 |
+
"drug_id_2": raw.get("drug_id_2") or None,
|
| 120 |
+
}
|
| 121 |
+
if atype == "propose_intervention":
|
| 122 |
+
it = raw.get("intervention_type", "")
|
| 123 |
+
if it not in VALID_INTERVENTIONS:
|
| 124 |
+
it = "add_monitoring"
|
| 125 |
+
result: Dict[str, Any] = {
|
| 126 |
+
"action_type": "propose_intervention",
|
| 127 |
+
"target_drug_id": raw.get("target_drug_id") or None,
|
| 128 |
+
"intervention_type": it,
|
| 129 |
+
}
|
| 130 |
+
new_drug = raw.get("proposed_new_drug_id") or None
|
| 131 |
+
if new_drug:
|
| 132 |
+
result["proposed_new_drug_id"] = new_drug
|
| 133 |
+
rationale = raw.get("rationale") or None
|
| 134 |
+
if rationale:
|
| 135 |
+
result["rationale"] = rationale
|
| 136 |
+
return result
|
| 137 |
+
return {"action_type": "finish_review"}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
def _llm_action(client: OpenAI, obs: Dict[str, Any]) -> Dict[str, Any]:
|
| 141 |
meds = obs.get("current_medications", [])
|
| 142 |
summary = {
|
|
|
|
| 177 |
|
| 178 |
def _step(action: Dict[str, Any]) -> Dict[str, Any]:
|
| 179 |
r = requests.post(f"{ENV_URL}/step", json={"action": action}, timeout=45)
|
| 180 |
+
if r.status_code == 422:
|
| 181 |
+
# Invalid action β return a penalty and let the agent continue
|
| 182 |
+
return {"observation": {}, "reward": -0.1, "done": False, "info": {"error": r.text[:200]}}
|
| 183 |
r.raise_for_status()
|
| 184 |
return r.json()
|
| 185 |
|
|
|
|
| 219 |
success = score > 0.0
|
| 220 |
break
|
| 221 |
except Exception:
|
|
|
|
| 222 |
success = False
|
| 223 |
finally:
|
| 224 |
log_end(success=success, steps=steps, score=score, rewards=rewards)
|
| 225 |
|
| 226 |
|
| 227 |
def main() -> int:
|
| 228 |
+
if not _API_KEY:
|
| 229 |
+
print("HF_TOKEN (or GROQ_API_KEY / API_KEY) is required", flush=True)
|
| 230 |
return 1
|
| 231 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=_API_KEY)
|
| 232 |
for task in TASKS:
|
| 233 |
run_task(client, task)
|
| 234 |
return 0
|