-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path2-required-main_refine_all.py
More file actions
88 lines (77 loc) · 2.71 KB
/
2-required-main_refine_all.py
File metadata and controls
88 lines (77 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from handle_data import refine
from handle_data.utils import utils
import os
__author__ = "Felipe Ukan Pereira"
@utils.timeit
def main():
"""
Main function -
"""
cleaned_dataset_path = os.path.join('datasets', 'cleaned_datasets')
refined_dataset_path = os.path.join('datasets', 'refined_datasets')
code_to_name_file = os.path.join('datasets', 'AQS_code_list', 'code_to_name.json')
# ********* Arguments/options for refining data
REFINE_PARAMETERS = {
# FIPS code for state and counties
# Code for parameters from usepa
# o is for optional
'R_country_code': '840',
'R_state_code': '06',
"R_prefix": refined_dataset_path,
'R_input_dataset_path': cleaned_dataset_path,
'R_code_to_name_file': code_to_name_file,
# 'R_year_range': [x for x in range(1997, 2018)],
'R_frequency': '1H',
'R_max_missing_percentage': 20,
'R_param_options': {
'61101': {
# this is WS Sca
},
'61102': {
# this is WD Sca
'R_o_upper_clip': {
# accepts only percentile
'percentile': 100,
},
},
'61103': {
# this is WS Res
},
'61104': {
# this is WD Res
'R_o_upper_clip': {
# accepts only percentile
'percentile': 100,
},
},
}
}
# ********* Args for refining Kuwait data
KUWAIT_REFINE_PARAMETERS = {
'R_output_dataset_path': os.path.join('datasets', 'Kuwait', 'Active', 'localformat', 'refined'),
'R_input_dataset_path': os.path.join('datasets', 'Kuwait', 'Active', 'localformat', 'cleaned'),
# 'R_code_to_name_file': code_to_name_file,
# 'R_frequency': '1H',
'R_max_missing_percentage': 20,
'R_min_nonzeros_in_day': 12,
# 'R_param_options': {
#
#},
}
# ********* Args for refining US BRIAN data
USBRIAN_REFINE_PARAMETERS = {
'R_output_dataset_path': os.path.join('datasets', 'US_Brian', 'original', 'localformat', 'refined'),
'R_input_dataset_path': os.path.join('datasets', 'US_Brian', 'original', 'localformat', 'cleaned'),
# 'R_code_to_name_file': code_to_name_file,
# 'R_frequency': '1H',
'R_max_missing_percentage': 20,
'R_min_nonzeros_in_day': 12,
# 'R_param_options': {
#
# },
}
# refine.refine_usepa_dataset(REFINE_PARAMETERS)
refine.refine_kuwait_dataset(KUWAIT_REFINE_PARAMETERS)
# refine.refine_usbrian_dataset(USBRIAN_REFINE_PARAMETERS)
if __name__ == "__main__":
main()