gpcervellera commited on
Commit
20d0c48
·
verified ·
1 Parent(s): 05fa0b8

Update algoritm.py

Browse files
Files changed (1) hide show
  1. algoritm.py +117 -92
algoritm.py CHANGED
@@ -1,93 +1,118 @@
1
-
2
- import os
3
- import statsmodels.api as sm
4
- from sklearn.linear_model import LinearRegression
5
- import pandas as pd
6
- import numpy as np
7
- import os
8
- import shutil
9
-
10
-
11
- def algoritmo(json_data):
12
- d = json_data
13
- df_s = pd.DataFrame({
14
- "deviceId": d["deviceId"],
15
- "TELEMETRY_SEQ_GROUP_ID": d["TELEMETRY_SEQ_GROUP_ID"],
16
- **{k: d[k] for k in d if isinstance(d[k], list)}
17
- })
18
- cols = [
19
- 'OAT', 'IAS', 'ALPHAI', 'BETAI', 'TETA', 'PHI', 'HDG', 'P', 'Q',
20
- 'R', 'NR', 'NP1', 'NG1', 'VZ', 'ZRS',
21
- 'E1A_TM_TOT', 'E1A_TM_T1', 'NGR1', 'DP1'
22
- ]
23
- X = df_s[cols]
24
- assert list(X.columns) == cols
25
-
26
- X = df_s[cols]
27
- X = sm.add_constant(X)
28
- y_pred = res_loaded.predict(X)
29
- y_pred_100 = y_pred * 100
30
- df_s['y_pred_100']=y_pred_100
31
- #print(len(y_pred_100))
32
- df_s["clogging_LH_Predicted"] = np.take(values, np.searchsorted(bins, df_s["y_pred_100"], side="right"))
33
- mean_clogging = df_s["clogging_LH_Predicted"].mean()
34
- #creo la cartella se non esiste
35
- device_id = d["deviceId"]
36
- csv_path = os.path.join(device_id, "clogging_mean_by_group.csv")
37
- group_id = d["TELEMETRY_SEQ_GROUP_ID"]
38
- if group_id == 0:
39
- if os.path.exists(device_id):
40
- shutil.rmtree(device_id)
41
- os.makedirs(device_id)
42
- #dataframe da scrivere
43
- row = pd.DataFrame([{
44
- "TELEMETRY_SEQ_GROUP_ID": group_id,
45
- "mean_clogging_LH_Predicted": mean_clogging
46
- }])
47
- row.to_csv(
48
- csv_path,
49
- mode="a",
50
- header=not os.path.exists(csv_path),
51
- index=False
52
- )
53
- #regressione
54
- dtf=pd.read_csv(csv_path)
55
- #print(dtf)
56
- l=len(dtf)
57
- if l>5:
58
- X=dtf[['TELEMETRY_SEQ_GROUP_ID']]
59
- y=dtf['mean_clogging_LH_Predicted']
60
- model = LinearRegression()
61
- model.fit(X, y)
62
-
63
- a = model.coef_[0]
64
- b = model.intercept_
65
-
66
- x_at_100 = ((100 - b) / a) if a != 0 else np.nan
67
- if a != 0:
68
- x_at_100 =x_at_100 - int(group_id)
69
- slope= a
70
- intercept= b
71
- r2= model.score(X, y)
72
- n_points= l
73
- else:
74
- x_at_100 =np.nan
75
- slope=np.nan
76
- intercept= np.nan
77
- r2= np.nan
78
- n_points= l
79
-
80
- else:
81
- x_at_100 =np.nan
82
- slope=np.nan
83
- intercept= np.nan
84
- r2= np.nan
85
- n_points= l
86
-
87
- pic={}
88
- pic['x_at_100']=x_at_100
89
- pic['slope']=slope
90
- pic['intercept']=intercept
91
- pic['r2']=r2
92
- pic['n_points']=l
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return pic
 
1
+
2
+ import os
3
+ import statsmodels.api as sm
4
+ from sklearn.linear_model import LinearRegression
5
+ import pandas as pd
6
+ import numpy as np
7
+ import os
8
+ import shutil
9
+
10
+ res_loaded = sm.load("beta_model_clogging.pkl")
11
+ #Prendiamo tutti i limiti superiori, tranne l’ultimo che va a +∞.
12
+ bins = [
13
+ 6.827491567, 7.207134976, 7.507514601, 7.85202832, 8.135308095,
14
+ 8.689867231, 9.286871296, 9.617912395, 10.09183855, 10.73296965,
15
+ 11.10692936, 11.69554997, 12.28992645, 12.73162955, 13.58962501,
16
+ 14.05211378, 14.82733437, 15.28453334, 15.97759006, 16.82037951,
17
+ 17.64491222, 18.42719281, 19.35467075, 20.11238201, 21.11239952,
18
+ 21.8749792, 23.13365381, 24.06277248, 24.69985079, 26.06401012,
19
+ 26.99098886, 28.06155461, 29.31860042, 30.62386734, 31.4413642,
20
+ 32.92566161, 33.97571072, 35.31251088, 36.15053542, 37.57770049,
21
+ 39.13652821, 40.29113449, 41.76522545, 42.82082043, 44.00697262,
22
+ 45.92880868, 46.82432879, 48.63647026, 49.74936819, 51.22777855,
23
+ 52.49036854, 53.51554916, 55.2477514, 56.28299801, 57.51382218,
24
+ 59.16053699, 60.15795894, 61.6664039, 62.90702101, 63.87127815,
25
+ 64.98319564, 66.22208356, 66.61521227, 68.15750081, 69.44821662,
26
+ 70.66864959, 71.92308181, 72.69082311, 73.92476305, 74.87935242,
27
+ 75.77031936, 76.94368679, 78.04134047, 79.02976362, 79.56238123,
28
+ 80.1200946, 81.16954675, 82.1616298, 82.70600553, 83.32428364,
29
+ 84.40737139, 85.17567459, 85.61323671, 86.27587966, 87.00108867,
30
+ 87.48195456, 87.87995156, 88.53346862, 88.95068965, 89.6043336,
31
+ 90.05141954, 90.51239833, 90.99974806, 91.33113872, 91.79738573,
32
+ 92.19314603, 92.59169911, 92.84177084, 93.23587096, np.inf
33
+ ]
34
+ values = [0] + list(range(2, 101))
35
+
36
+ def algoritmo(json_data):
37
+ d = json_data
38
+ df_s = pd.DataFrame({
39
+ "deviceId": d["deviceId"],
40
+ "TELEMETRY_SEQ_GROUP_ID": d["TELEMETRY_SEQ_GROUP_ID"],
41
+ **{k: d[k] for k in d if isinstance(d[k], list)}
42
+ })
43
+ cols = [
44
+ 'OAT', 'IAS', 'ALPHAI', 'BETAI', 'TETA', 'PHI', 'HDG', 'P', 'Q',
45
+ 'R', 'NR', 'NP1', 'NG1', 'VZ', 'ZRS',
46
+ 'E1A_TM_TOT', 'E1A_TM_T1', 'NGR1', 'DP1'
47
+ ]
48
+ X = df_s[cols]
49
+ assert list(X.columns) == cols
50
+
51
+ X = df_s[cols]
52
+ X = sm.add_constant(X)
53
+ y_pred = res_loaded.predict(X)
54
+ y_pred_100 = y_pred * 100
55
+ df_s['y_pred_100']=y_pred_100
56
+ #print(len(y_pred_100))
57
+ df_s["clogging_LH_Predicted"] = np.take(values, np.searchsorted(bins, df_s["y_pred_100"], side="right"))
58
+ mean_clogging = df_s["clogging_LH_Predicted"].mean()
59
+ #creo la cartella se non esiste
60
+ device_id = d["deviceId"]
61
+ csv_path = os.path.join(device_id, "clogging_mean_by_group.csv")
62
+ group_id = d["TELEMETRY_SEQ_GROUP_ID"]
63
+ if group_id == 0:
64
+ if os.path.exists(device_id):
65
+ shutil.rmtree(device_id)
66
+ os.makedirs(device_id)
67
+ #dataframe da scrivere
68
+ row = pd.DataFrame([{
69
+ "TELEMETRY_SEQ_GROUP_ID": group_id,
70
+ "mean_clogging_LH_Predicted": mean_clogging
71
+ }])
72
+ row.to_csv(
73
+ csv_path,
74
+ mode="a",
75
+ header=not os.path.exists(csv_path),
76
+ index=False
77
+ )
78
+ #regressione
79
+ dtf=pd.read_csv(csv_path)
80
+ #print(dtf)
81
+ l=len(dtf)
82
+ if l>5:
83
+ X=dtf[['TELEMETRY_SEQ_GROUP_ID']]
84
+ y=dtf['mean_clogging_LH_Predicted']
85
+ model = LinearRegression()
86
+ model.fit(X, y)
87
+
88
+ a = model.coef_[0]
89
+ b = model.intercept_
90
+
91
+ x_at_100 = ((100 - b) / a) if a != 0 else np.nan
92
+ if a != 0:
93
+ x_at_100 =x_at_100 - int(group_id)
94
+ slope= a
95
+ intercept= b
96
+ r2= model.score(X, y)
97
+ n_points= l
98
+ else:
99
+ x_at_100 =np.nan
100
+ slope=np.nan
101
+ intercept= np.nan
102
+ r2= np.nan
103
+ n_points= l
104
+
105
+ else:
106
+ x_at_100 =np.nan
107
+ slope=np.nan
108
+ intercept= np.nan
109
+ r2= np.nan
110
+ n_points= l
111
+
112
+ pic={}
113
+ pic['x_at_100']=x_at_100
114
+ pic['slope']=slope
115
+ pic['intercept']=intercept
116
+ pic['r2']=r2
117
+ pic['n_points']=l
118
  return pic