I am not using MIMIC-III or eicu data, and since this pipeline should e applicable to other EHR data sets, I am using it for in-house EHR data. No matter how I preprocess ICD codes e.g. ICD9:V50.2
vs V50.2
vs V502
. I always encounter the error below:
--------------------------------------------------------------------------------
2-B) Transform time-dependent data
--------------------------------------------------------------------------------
Total variables : 31734
Traceback (most recent call last):
File "D:\bo\envs\bd\lib\site-packages\pandas\core\indexes\base.py", line 3361, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 76, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'icd_code:0'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "D:\bo\envs\bd\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "D:\bo\envs\bd\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "D:\bo\EOBD_prediction\FIDDLE\run.py", line 141, in <module>
main()
File "D:\bo\EOBD_prediction\FIDDLE\run.py", line 138, in main
X, X_feature_names, X_feature_aliases = FIDDLE_steps.process_time_dependent(df_time_series, args)
File "D:\bo\EOBD_prediction\FIDDLE\steps.py", line 235, in process_time_dependent
df_time_series, dtypes_time_series = transform_time_series_table(df_data_time_series, args)
File "D:\bo\EOBD_prediction\FIDDLE\steps.py", line 430, in transform_time_series_table
variables_num_freq = get_frequent_numeric_variables(df_in, variables, theta_freq, args)
File "D:\bo\EOBD_prediction\FIDDLE\helpers.py", line 93, in get_frequent_numeric_variables
numeric_vars = [col for col in variables if df_types[col] == 'Numeric']
File "D:\bo\EOBD_prediction\FIDDLE\helpers.py", line 93, in <listcomp>
numeric_vars = [col for col in variables if df_types[col] == 'Numeric']
File "D:\bo\envs\bd\lib\site-packages\pandas\core\series.py", line 942, in __getitem__
return self._get_value(key)
File "D:\bo\envs\bd\lib\site-packages\pandas\core\series.py", line 1051, in _get_value
loc = self.index.get_loc(label)
File "D:\bo\envs\bd\lib\site-packages\pandas\core\indexes\base.py", line 3363, in get_loc
raise KeyError(key) from err
KeyError: 'icd_code:0'