WLenhard commited on
Commit
d711953
·
verified ·
1 Parent(s): 45cb6e8

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +23 -0
  2. README.md +39 -12
  3. app.R +261 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM rocker/r-ver:4.3.2
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ libcurl4-openssl-dev \
6
+ libssl-dev \
7
+ libxml2-dev \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install R packages
11
+ RUN R -e "install.packages('plumber', repos='https://cloud.r-project.org/')"
12
+
13
+ # Set working directory
14
+ WORKDIR /app
15
+
16
+ # Copy application files
17
+ COPY app.R /app/
18
+
19
+ # Expose port (Hugging Face uses 7860)
20
+ EXPOSE 7860
21
+
22
+ # Start the API
23
+ CMD ["R", "-e", "pr <- plumber::plumb('app.R'); pr$run(host='0.0.0.0', port=7860)"]
README.md CHANGED
@@ -1,12 +1,39 @@
1
- ---
2
- title: D.quantile
3
- emoji: 🔥
4
- colorFrom: gray
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- short_description: Distribution-free effect size
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Effect Size Calculator
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # Distribution-Free Effect Size Calculator
12
+
13
+ This API calculates distribution-free effect sizes using quantile function modeling.
14
+
15
+ ## Usage
16
+
17
+ Send GET or POST request to `/calculate` with:
18
+ - `group1`: comma-separated numeric values
19
+ - `group2`: comma-separated numeric values
20
+ - `degree`: polynomial degree (optional, default=5)
21
+
22
+ Example:
23
+ ```
24
+ /calculate?group1=1,2,3,4,5&group2=6,7,8,9,10
25
+ ```
26
+
27
+ ## API Endpoints
28
+
29
+ - `GET /` - Health check
30
+ - `GET /calculate` - Calculate effect size
31
+ - `POST /calculate` - Calculate effect size
32
+
33
+ ## Author
34
+
35
+ Wolfgang Lenhard and Alexandra Lenhard
36
+
37
+ ## Citation
38
+
39
+ Lenhard, W. (submitted). Distribution-Free Effect Size Estimation via Quantile Function Modeling.
app.R ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(plumber)
2
+
3
+ #* @apiTitle Effect Size Calculator API
4
+
5
+ # Your functions here
6
+ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
7
+
8
+ # Input validation
9
+ if (!is.numeric(x1) || !is.numeric(x2)) {
10
+ stop("Both x1 and x2 must be numeric vectors.")
11
+ }
12
+
13
+ n1 <- length(x1)
14
+ n2 <- length(x2)
15
+
16
+ if (n1 < degree + 1) {
17
+ stop("Group 1 has insufficient data: need at least ", degree + 1,
18
+ " observations for degree ", degree, " polynomial (got ", n1, ").")
19
+ }
20
+
21
+ if (n2 < degree + 1) {
22
+ stop("Group 2 has insufficient data: need at least ", degree + 1,
23
+ " observations for degree ", degree, " polynomial (got ", n2, ").")
24
+ }
25
+
26
+ if (any(is.na(x1)) || any(is.na(x2))) {
27
+ warning("Missing values detected and will be removed.")
28
+ x1 <- x1[!is.na(x1)]
29
+ x2 <- x2[!is.na(x2)]
30
+ n1 <- length(x1)
31
+ n2 <- length(x2)
32
+ }
33
+
34
+ if (n1 == 0 || n2 == 0) {
35
+ stop("Cannot compute effect size with empty groups after removing NAs.")
36
+ }
37
+
38
+ model1 <- fit_quantile_function(x1, degree)
39
+ model2 <- fit_quantile_function(x2, degree)
40
+
41
+ tie1 <- attr(model1, "tie_proportion")
42
+ tie2 <- attr(model2, "tie_proportion")
43
+
44
+ if (!silent && (tie1 > 0.3 || tie2 > 0.3)) {
45
+ message(sprintf(
46
+ "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
47
+ tie1 * 100, tie2 * 100
48
+ ))
49
+ }
50
+
51
+ moments1 <- get_moments(model1, group_label = "Group 1")
52
+ moments2 <- get_moments(model2, group_label = "Group 2")
53
+
54
+ weighted_pooled_variance <- ((n1 -1) * moments1$variance + (n2 - 1) * moments2$variance) / (n1 + n2 - 2)
55
+ pooled_sd <- sqrt(weighted_pooled_variance)
56
+
57
+ mean_diff <- moments2$mean - moments1$mean
58
+
59
+ if (pooled_sd == 0) {
60
+ if (mean_diff == 0) {
61
+ d_q <- 0
62
+ } else {
63
+ d_q <- sign(mean_diff) * Inf
64
+ warning("Pooled SD is zero but means differ. Returning Inf with appropriate sign.")
65
+ }
66
+ } else {
67
+ d_q <- mean_diff / pooled_sd
68
+ }
69
+
70
+ result <- list(
71
+ d_q = d_q,
72
+ group1_mean = moments1$mean,
73
+ group1_variance = moments1$variance,
74
+ group1_sd = sqrt(moments1$variance),
75
+ group2_mean = moments2$mean,
76
+ group2_variance = moments2$variance,
77
+ group2_sd = sqrt(moments2$variance),
78
+ pooled_sd = pooled_sd,
79
+ n1 = n1,
80
+ n2 = n2,
81
+ degree = degree
82
+ )
83
+
84
+ result$tie_proportion_1 <- tie1
85
+ result$tie_proportion_2 <- tie2
86
+
87
+ return(result)
88
+ }
89
+
90
+ fit_quantile_function <- function(x, poly_degree,
91
+ check_monotonicity = FALSE,
92
+ min_degree = 2) {
93
+
94
+ n <- length(x)
95
+
96
+ if (n < 3) {
97
+ stop("Need at least 3 observations to fit a polynomial quantile function.")
98
+ }
99
+
100
+ n_unique <- length(unique(x))
101
+ tie_proportion <- 1 - (n_unique / n)
102
+
103
+ max_possible_degree <- n_unique - 1
104
+
105
+ if (poly_degree > max_possible_degree) {
106
+ poly_degree <- max_possible_degree
107
+ }
108
+
109
+ if (tie_proportion > 0.3 && poly_degree > 3) {
110
+ recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2)))
111
+ if (recommended_degree < poly_degree) {
112
+ poly_degree <- recommended_degree
113
+ }
114
+ }
115
+
116
+ if (poly_degree < min_degree) {
117
+ stop(sprintf(
118
+ "Insufficient unique values (%d) to fit minimum polynomial degree (%d).",
119
+ n_unique, min_degree
120
+ ))
121
+ }
122
+
123
+ avg_ranks <- rank(x, ties.method = "average")
124
+ p <- (avg_ranks - 0.5) / n
125
+ z <- qnorm(p)
126
+
127
+ current_degree <- poly_degree
128
+ model <- lm(x ~ poly(z, current_degree, raw = TRUE))
129
+
130
+ attr(model, "sample_size") <- n
131
+ attr(model, "n_unique") <- n_unique
132
+ attr(model, "tie_proportion") <- tie_proportion
133
+ attr(model, "poly_degree") <- current_degree
134
+
135
+ return(model)
136
+ }
137
+
138
+ get_moments <- function(model, group_label = "Unknown") {
139
+ coeffs <- coef(model)
140
+ poly_degree <- length(coeffs) - 1
141
+
142
+ f <- function(z) {
143
+ val <- coeffs[poly_degree + 1]
144
+ for (i in poly_degree:1) {
145
+ val <- val * z + coeffs[i]
146
+ }
147
+ return(val)
148
+ }
149
+
150
+ mean_integrand <- function(z) {
151
+ f(z) * dnorm(z)
152
+ }
153
+
154
+ mean_result <- integrate(
155
+ mean_integrand,
156
+ lower = -Inf,
157
+ upper = Inf,
158
+ subdivisions = 2000L,
159
+ rel.tol = 1e-8,
160
+ abs.tol = 1e-10,
161
+ stop.on.error = FALSE
162
+ )
163
+
164
+ mu <- mean_result$value
165
+
166
+ variance_integrand <- function(z) {
167
+ deviation <- f(z) - mu
168
+ deviation^2 * dnorm(z)
169
+ }
170
+
171
+ variance_result <- integrate(
172
+ variance_integrand,
173
+ lower = -Inf,
174
+ upper = Inf,
175
+ subdivisions = 2000L,
176
+ rel.tol = 1e-8,
177
+ abs.tol = 1e-10,
178
+ stop.on.error = FALSE
179
+ )
180
+
181
+ var <- variance_result$value
182
+
183
+ if (var < 0 && abs(var) < 1e-10) {
184
+ var <- 0
185
+ }
186
+
187
+ return(list(
188
+ mean = mu,
189
+ variance = var
190
+ ))
191
+ }
192
+
193
+ # API endpoint
194
+ #* Calculate effect size from two groups
195
+ #* @param group1 Comma-separated numeric values for group 1
196
+ #* @param group2 Comma-separated numeric values for group 2
197
+ #* @param degree Polynomial degree (default 5)
198
+ #* @post /calculate
199
+ #* @get /calculate
200
+ function(group1, group2, degree = 5) {
201
+
202
+ tryCatch({
203
+ # Parse input
204
+ x1 <- as.numeric(unlist(strsplit(group1, ",")))
205
+ x2 <- as.numeric(unlist(strsplit(group2, ",")))
206
+ deg <- as.numeric(degree)
207
+
208
+ # Remove any NA values from parsing
209
+ x1 <- x1[!is.na(x1)]
210
+ x2 <- x2[!is.na(x2)]
211
+
212
+ if (length(x1) == 0 || length(x2) == 0) {
213
+ return(list(
214
+ error = "Invalid input: Could not parse numeric values from input strings"
215
+ ))
216
+ }
217
+
218
+ # Calculate effect size
219
+ result <- d.quantile(x1, x2, degree = deg, silent = TRUE)
220
+
221
+ # Return clean result
222
+ return(list(
223
+ success = TRUE,
224
+ d_q = result$d_q,
225
+ group1 = list(
226
+ n = result$n1,
227
+ mean = result$group1_mean,
228
+ sd = result$group1_sd
229
+ ),
230
+ group2 = list(
231
+ n = result$n2,
232
+ mean = result$group2_mean,
233
+ sd = result$group2_sd
234
+ ),
235
+ pooled_sd = result$pooled_sd,
236
+ degree = result$degree,
237
+ tie_info = list(
238
+ group1_ties = result$tie_proportion_1,
239
+ group2_ties = result$tie_proportion_2
240
+ )
241
+ ))
242
+
243
+ }, error = function(e) {
244
+ return(list(
245
+ success = FALSE,
246
+ error = as.character(e$message)
247
+ ))
248
+ })
249
+ }
250
+
251
+ #* Health check endpoint
252
+ #* @get /
253
+ function() {
254
+ return(list(
255
+ status = "running",
256
+ message = "Effect Size Calculator API is ready",
257
+ endpoints = list(
258
+ calculate = "/calculate?group1=1,2,3&group2=4,5,6"
259
+ )
260
+ ))
261
+ }