`# DRL models from Stable Baselines 3
from future import annotations
import time
import numpy as np
import pandas as pd
from stable_baselines3 import A2C
from stable_baselines3 import DDPG
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3 import TD3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv
from finrl import config
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.preprocessors import data_split
MODELS = {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO}
MODEL_KWARGS = {x: config.dict[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}
NOISE = {
"normal": NormalActionNoise,
"ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise,
}
class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""
def __init__(self, verbose=0):
super().__init__(verbose)
def _on_step(self) -> bool:
try:
self.logger.record(key="train/reward", value=self.locals["rewards"][0])
except BaseException:
self.logger.record(key="train/reward", value=self.locals["reward"][0])
return True
class DRLAgent:
"""Provides implementations for DRL algorithms
Attributes
----------
env: gym environment class
user-defined class
Methods
-------
get_model()
setup DRL algorithms
train_model()
train DRL algorithms in a train dataset
and output the trained model
DRL_prediction()
make a prediction in a test dataset and get results
"""
def __init__(self, env):
self.env = env
def get_model(
self,
model_name,
policy="MlpPolicy",
policy_kwargs=None,
model_kwargs=None,
verbose=1,
seed=None,
tensorboard_log=None,
):
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
if model_kwargs is None:
model_kwargs = MODEL_KWARGS[model_name]
if "action_noise" in model_kwargs:
n_actions = self.env.action_space.shape[-1]
model_kwargs["action_noise"] = NOISE[model_kwargs["action_noise"]](
mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)
)
print(model_kwargs)
return MODELS[model_name](
policy=policy,
env=self.env,
tensorboard_log=tensorboard_log,
verbose=verbose,
policy_kwargs=policy_kwargs,
seed=seed,
**model_kwargs,
)
def train_model(self, model, tb_log_name, total_timesteps=5000):
model = model.learn(
total_timesteps=total_timesteps,
tb_log_name=tb_log_name,
callback=TensorboardCallback(),
)
return model
@staticmethod
def DRL_prediction(model, environment, deterministic=True):
test_env, test_obs = environment.get_sb_env()
"""make a prediction"""
account_memory = []
actions_memory = []
# state_memory=[] #add memory pool to store states
test_env.reset()
for i in range(len(environment.df.index.unique())):
action, _states = model.predict(test_obs, deterministic=deterministic)
# account_memory = test_env.env_method(method_name="save_asset_memory")
# actions_memory = test_env.env_method(method_name="save_action_memory")
test_obs, rewards, dones, info = test_env.step(action)
if i == (len(environment.df.index.unique()) - 2):
account_memory = test_env.env_method(method_name="save_asset_memory")
actions_memory = test_env.env_method(method_name="save_action_memory")
# state_memory=test_env.env_method(method_name="save_state_memory") # add current state to state memory
if dones[0]:
print("hit end!")
break
return account_memory[0], actions_memory[0]
@staticmethod
def DRL_prediction_load_from_file(model_name, environment, cwd, deterministic=True):
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
try:
# load agent
model = MODELS[model_name].load(cwd)
print("Successfully load model", cwd)
except BaseException:
raise ValueError("Fail to load agent!")
# test on the testing env
state = environment.reset()
episode_returns = [] # the cumulative_return / initial_account
episode_total_assets = [environment.initial_total_asset]
done = False
while not done:
action = model.predict(state, deterministic=deterministic)[0]
state, reward, done, _ = environment.step(action)
total_asset = (
environment.amount
+ (environment.price_ary[environment.day] * environment.stocks).sum()
)
episode_total_assets.append(total_asset)
episode_return = total_asset / environment.initial_total_asset
episode_returns.append(episode_return)
print("episode_return", episode_return)
print("Test Finished!")
return episode_total_assets
class DRLEnsembleAgent:
@staticmethod
def get_model(
model_name,
env,
policy="MlpPolicy",
policy_kwargs=None,
model_kwargs=None,
seed=None,
verbose=1,
):
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
if model_kwargs is None:
temp_model_kwargs = MODEL_KWARGS[model_name]
else:
temp_model_kwargs = model_kwargs.copy()
if "action_noise" in temp_model_kwargs:
n_actions = env.action_space.shape[-1]
temp_model_kwargs["action_noise"] = NOISE[
temp_model_kwargs["action_noise"]
](mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
print(temp_model_kwargs)
return MODELS[model_name](
policy=policy,
env=env,
tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}",
verbose=verbose,
policy_kwargs=policy_kwargs,
seed=seed,
**temp_model_kwargs,
)
@staticmethod
def train_model(model, model_name, tb_log_name, iter_num, total_timesteps=5000):
model = model.learn(
total_timesteps=total_timesteps,
tb_log_name=tb_log_name,
callback=TensorboardCallback(),
)
model.save(
f"{config.TRAINED_MODEL_DIR}/{model_name.upper()}_{total_timesteps // 1000}k_{iter_num}"
)
return model
@staticmethod
def get_validation_sharpe(iteration, model_name):
"""Calculate Sharpe ratio based on validation results"""
df_total_value = pd.read_csv(
f"results/account_value_validation_{model_name}_{iteration}.csv"
)
# If the agent did not make any transaction
if df_total_value["daily_return"].var() == 0:
if df_total_value["daily_return"].mean() > 0:
return np.inf
else:
return 0.0
else:
return (
(7**0.5)
* df_total_value["daily_return"].mean()
/ df_total_value["daily_return"].std()
)
def __init__(
self,
df,
train_period,
val_test_period,
rebalance_window,
validation_window,
stock_dim,
hmax,
initial_amount,
buy_cost_pct,
sell_cost_pct,
reward_scaling,
state_space,
action_space,
tech_indicator_list,
print_verbosity,
):
self.df = df
self.train_period = train_period
self.val_test_period = val_test_period
self.unique_trade_date = df[
(df.date > val_test_period[0]) & (df.date <= val_test_period[1])
].date.unique()
self.rebalance_window = rebalance_window
self.validation_window = validation_window
self.stock_dim = stock_dim
self.hmax = hmax
self.initial_amount = initial_amount
self.buy_cost_pct = buy_cost_pct
self.sell_cost_pct = sell_cost_pct
self.reward_scaling = reward_scaling
self.state_space = state_space
self.action_space = action_space
self.tech_indicator_list = tech_indicator_list
self.print_verbosity = print_verbosity
def DRL_validation(self, model, test_data, test_env, test_obs):
"""validation process"""
for _ in range(len(test_data.index.unique())):
action, _states = model.predict(test_obs)
test_obs, rewards, dones, info = test_env.step(action)
def DRL_prediction(
self, model, name, last_state, iter_num, turbulence_threshold, initial
):
"""make a prediction based on trained model"""
## trading env
trade_data = data_split(
self.df,
start=self.unique_trade_date[iter_num - self.rebalance_window],
end=self.unique_trade_date[iter_num],
)
trade_env = DummyVecEnv(
[
lambda: StockTradingEnv(
df=trade_data,
stock_dim=self.stock_dim,
hmax=self.hmax,
initial_amount=self.initial_amount,
num_stock_shares=[0] * self.stock_dim,
buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
reward_scaling=self.reward_scaling,
state_space=self.state_space,
action_space=self.action_space,
tech_indicator_list=self.tech_indicator_list,
turbulence_threshold=turbulence_threshold,
initial=initial,
previous_state=last_state,
model_name=name,
mode="trade",
iteration=iter_num,
print_verbosity=self.print_verbosity,
)
]
)
trade_obs = trade_env.reset()
for i in range(len(trade_data.index.unique())):
action, _states = model.predict(trade_obs)
trade_obs, rewards, dones, info = trade_env.step(action)
if i == (len(trade_data.index.unique()) - 2):
# print(env_test.render())
last_state = trade_env.render()
df_last_state = pd.DataFrame({"last_state": last_state})
df_last_state.to_csv(f"results/last_state_{name}_{i}.csv", index=False)
return last_state
def run_ensemble_strategy(
self, A2C_model_kwargs, PPO_model_kwargs, DDPG_model_kwargs,SAC_model_kwargs,TD3_model_kwargs, timesteps_dict
):
"""Ensemble Strategy that combines PPO, A2C and DDPG"""
print("============Start Ensemble Strategy============")
# for ensemble model, it's necessary to feed the last state
# of the previous model to the current model as the initial state
last_state_ensemble = []
ppo_sharpe_list = []
ddpg_sharpe_list = []
a2c_sharpe_list = []
td3_sharpe_list = []
sac_sharpe_list = []
model_use = []
validation_start_date_list = []
validation_end_date_list = []
iteration_list = []
insample_turbulence = self.df[
(self.df.date < self.train_period[1])
& (self.df.date >= self.train_period[0])
]
insample_turbulence_threshold = np.quantile(
insample_turbulence.turbulence.values, 0.90
)
start = time.time()
for i in range(
self.rebalance_window + self.validation_window,
len(self.unique_trade_date),
self.rebalance_window,
):
validation_start_date = self.unique_trade_date[
i - self.rebalance_window - self.validation_window
]
validation_end_date = self.unique_trade_date[i - self.rebalance_window]
validation_start_date_list.append(validation_start_date)
validation_end_date_list.append(validation_end_date)
iteration_list.append(i)
print("============================================")
## initial state is empty
if i - self.rebalance_window - self.validation_window == 0:
# inital state
initial = True
else:
# previous state
initial = False
# Tuning trubulence index based on historical data
# Turbulence lookback window is one quarter (63 days)
end_date_index = self.df.index[
self.df["date"]
== self.unique_trade_date[
i - self.rebalance_window - self.validation_window
]
].to_list()[-1]
start_date_index = end_date_index - 63 + 1
historical_turbulence = self.df.iloc[
start_date_index : (end_date_index + 1), :
]
historical_turbulence = historical_turbulence.drop_duplicates(
subset=["date"]
)
historical_turbulence_mean = np.mean(
historical_turbulence.turbulence.values
)
# print(historical_turbulence_mean)
if historical_turbulence_mean > insample_turbulence_threshold:
# if the mean of the historical data is greater than the 90% quantile of insample turbulence data
# then we assume that the current market is volatile,
# therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
# meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
turbulence_threshold = insample_turbulence_threshold
else:
# if the mean of the historical data is less than the 90% quantile of insample turbulence data
# then we tune up the turbulence_threshold, meaning we lower the risk
turbulence_threshold = np.quantile(
insample_turbulence.turbulence.values, 1
)
turbulence_threshold = np.quantile(
insample_turbulence.turbulence.values, 0.99
)
print("turbulence_threshold: ", turbulence_threshold)
############## Environment Setup starts ##############
## training env
train = data_split(
self.df,
start=self.train_period[0],
end=self.unique_trade_date[
i - self.rebalance_window - self.validation_window
],
)
self.train_env = DummyVecEnv(
[
lambda: StockTradingEnv(
df=train,
stock_dim=self.stock_dim,
hmax=self.hmax,
initial_amount=self.initial_amount,
num_stock_shares=[0] * self.stock_dim,
buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
reward_scaling=self.reward_scaling,
state_space=self.state_space,
action_space=self.action_space,
tech_indicator_list=self.tech_indicator_list,
print_verbosity=self.print_verbosity,
)
]
)
validation = data_split(
self.df,
start=self.unique_trade_date[
i - self.rebalance_window - self.validation_window
],
end=self.unique_trade_date[i - self.rebalance_window],
)
############## Environment Setup ends ##############
############## Training and Validation starts ##############
print(
"======Model training from: ",
self.train_period[0],
"to ",
self.unique_trade_date[
i - self.rebalance_window - self.validation_window
],
)
# print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) ))
# print("==============Model Training===========")
print("======A2C Training========")
model_a2c = self.get_model(
"a2c", self.train_env, policy="MlpPolicy", model_kwargs=A2C_model_kwargs
)
model_a2c = self.train_model(
model_a2c,
"a2c",
tb_log_name=f"a2c_{i}",
iter_num=i,
total_timesteps=timesteps_dict["a2c"],
) # 100_000
print(
"======A2C Validation from: ",
validation_start_date,
"to ",
validation_end_date,
)
val_env_a2c = DummyVecEnv(
[
lambda: StockTradingEnv(
df=validation,
stock_dim=self.stock_dim,
hmax=self.hmax,
initial_amount=self.initial_amount,
num_stock_shares=[0] * self.stock_dim,
buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
reward_scaling=self.reward_scaling,
state_space=self.state_space,
action_space=self.action_space,
tech_indicator_list=self.tech_indicator_list,
turbulence_threshold=turbulence_threshold,
iteration=i,
model_name="A2C",
mode="validation",
print_verbosity=self.print_verbosity,
)
]
)
val_obs_a2c = val_env_a2c.reset()
self.DRL_validation(
model=model_a2c,
test_data=validation,
test_env=val_env_a2c,
test_obs=val_obs_a2c,
)
sharpe_a2c = self.get_validation_sharpe(i, model_name="A2C")
print("A2C Sharpe Ratio: ", sharpe_a2c)
print("======PPO Training========")
model_ppo = self.get_model(
"ppo", self.train_env, policy="MlpPolicy", model_kwargs=PPO_model_kwargs
)
model_ppo = self.train_model(
model_ppo,
"ppo",
tb_log_name=f"ppo_{i}",
iter_num=i,
total_timesteps=timesteps_dict["ppo"],
) # 100_000
print(
"======PPO Validation from: ",
validation_start_date,
"to ",
validation_end_date,
)
val_env_ppo = DummyVecEnv(
[
lambda: StockTradingEnv(
df=validation,
stock_dim=self.stock_dim,
hmax=self.hmax,
initial_amount=self.initial_amount,
num_stock_shares=[0] * self.stock_dim,
buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
reward_scaling=self.reward_scaling,
state_space=self.state_space,
action_space=self.action_space,
tech_indicator_list=self.tech_indicator_list,
turbulence_threshold=turbulence_threshold,
iteration=i,
model_name="PPO",
mode="validation",
print_verbosity=self.print_verbosity,
)
]
)
val_obs_ppo = val_env_ppo.reset()
self.DRL_validation(
model=model_ppo,
test_data=validation,
test_env=val_env_ppo,
test_obs=val_obs_ppo,
)
sharpe_ppo = self.get_validation_sharpe(i, model_name="PPO")
print("PPO Sharpe Ratio: ", sharpe_ppo)
print("======SAC Training========")
model_sac = self.get_model(
"sac", self.train_env, policy="MlpPolicy", model_kwargs=SAC_model_kwargs
)
model_sac = self.train_model(
model_sac,
"sac",
tb_log_name=f"sac_{i}",
iter_num=i,
total_timesteps=timesteps_dict["sac"],
) # 100_000
print(
"======SAC Validation from: ",
validation_start_date,
"to ",
validation_end_date,
)
val_env_sac = DummyVecEnv(
[
lambda: StockTradingEnv(
df=validation,
stock_dim=self.stock_dim,
hmax=self.hmax,
initial_amount=self.initial_amount,
num_stock_shares=[0] * self.stock_dim,
buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
reward_scaling=self.reward_scaling,
state_space=self.state_space,
action_space=self.action_space,
tech_indicator_list=self.tech_indicator_list,
turbulence_threshold=turbulence_threshold,
iteration=i,
model_name="SAC",
mode="validation",
print_verbosity=self.print_verbosity,
)
]
)
val_obs_sac = val_env_sac.reset()
self.DRL_validation(
model=model_sac,
test_data=validation,
test_env=val_env_sac,
test_obs=val_obs_sac,
)
sharpe_sac = self.get_validation_sharpe(i, model_name=“SAC")
print(“SAC Sharpe Ratio: ", sharpe_sac)
print("======TD3 Training========")
model_td3 = self.get_model(
"td3", self.train_env, policy="MlpPolicy", model_kwargs=TD3_model_kwargs
)
model_td3 = self.train_model(
model_td3,
"td3",
tb_log_name=f"td3_{i}",
iter_num=i,
total_timesteps=timesteps_dict["td3"],
) # 100_000
print(
"======TD3 Validation from: ",
validation_start_date,
"to ",
validation_end_date,
)
val_env_td3 = DummyVecEnv(
[
lambda: StockTradingEnv(
df=validation,
stock_dim=self.stock_dim,
hmax=self.hmax,
initial_amount=self.initial_amount,
num_stock_shares=[0] * self.stock_dim,
buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
reward_scaling=self.reward_scaling,
state_space=self.state_space,
action_space=self.action_space,
tech_indicator_list=self.tech_indicator_list,
turbulence_threshold=turbulence_threshold,
iteration=i,
model_name="TD3”,
mode="validation",
print_verbosity=self.print_verbosity,
)
]
)
val_obs_td3 = val_env_td3.reset()
self.DRL_validation(
model=model_td3,
test_data=validation,
test_env=val_env_td3,
test_obs=val_obs_td3,
)
sharpe_td3 = self.get_validation_sharpe(i, model_name=“TD3”)
print(“TD3 Sharpe Ratio: ", sharpe_td3)
print("======DDPG Training========")
model_ddpg = self.get_model(
"ddpg",
self.train_env,
policy="MlpPolicy",
model_kwargs=DDPG_model_kwargs,
)
model_ddpg = self.train_model(
model_ddpg,
"ddpg",
tb_log_name=f"ddpg_{i}",
iter_num=i,
total_timesteps=timesteps_dict["ddpg"],
) # 50_000
print(
"======DDPG Validation from: ",
validation_start_date,
"to ",
validation_end_date,
)
val_env_ddpg = DummyVecEnv(
[
lambda: StockTradingEnv(
df=validation,
stock_dim=self.stock_dim,
hmax=self.hmax,
initial_amount=self.initial_amount,
num_stock_shares=[0] * self.stock_dim,
buy_cost_pct=[self.buy_cost_pct] * self.stock_dim,
sell_cost_pct=[self.sell_cost_pct] * self.stock_dim,
reward_scaling=self.reward_scaling,
state_space=self.state_space,
action_space=self.action_space,
tech_indicator_list=self.tech_indicator_list,
turbulence_threshold=turbulence_threshold,
iteration=i,
model_name="DDPG",
mode="validation",
print_verbosity=self.print_verbosity,
)
]
)
val_obs_ddpg = val_env_ddpg.reset()
self.DRL_validation(
model=model_ddpg,
test_data=validation,
test_env=val_env_ddpg,
test_obs=val_obs_ddpg,
)
sharpe_ddpg = self.get_validation_sharpe(i, model_name="DDPG")
ppo_sharpe_list.append(sharpe_ppo)
a2c_sharpe_list.append(sharpe_a2c)
sac_sharpe_list.append(sharpe_sac)
td3_sharpe_list.append(sharpe_td3)
ddpg_sharpe_list.append(sharpe_ddpg)
print(
"======Best Model Retraining from: ",
self.train_period[0],
"to ",
self.unique_trade_date[i - self.rebalance_window],
)
# Environment setup for model retraining up to first trade date
# train_full = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window])
# self.train_full_env = DummyVecEnv([lambda: StockTradingEnv(train_full,
# self.stock_dim,
# self.hmax,
# self.initial_amount,
# self.buy_cost_pct,
# self.sell_cost_pct,
# self.reward_scaling,
# self.state_space,
# self.action_space,
# self.tech_indicator_list,
# print_verbosity=self.print_verbosity)])
# Model Selection based on sharpe ratio
if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg) & (sharpe_ppo >= sharpe_sac) & (sharpe_ppo >= sharpe_td3):
model_use.append("PPO")
model_ensemble = model_ppo
# model_ensemble = self.get_model("ppo",self.train_full_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs)
# model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000
elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg) & (sharpe_a2c >= sharpe_sac) & (sharpe_a2c >= sharpe_td3)::
model_use.append("A2C")
model_ensemble = model_a2c
# model_ensemble = self.get_model("a2c",self.train_full_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs)
# model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000
elif (sharpe_td3 > sharpe_ppo) & (sharpe_td3 > sharpe_ddpg) & (sharpe_td3 >= sharpe_sac) & (sharpe_td3 >= sharpe_a2c)::
model_use.append(“TD3”)
model_ensemble = model_td3
# model_ensemble = self.get_model("td3",self.train_full_env,policy="MlpPolicy",model_kwargs=TD3_model_kwargs)
# model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['td3']) #100_000
elif (sharpe_sac > sharpe_ppo) & (sharpe_sac > sharpe_ddpg) & (sharpe_sac >= sharpe_td3) & (sharpe_sac >= sharpe_a2c)::
model_use.append(“SAC”)
model_ensemble = model_sac
# model_ensemble = self.get_model("sac",self.train_full_env,policy="MlpPolicy",model_kwargs=SAC_model_kwargs)
# model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['sac']) #100_000
else:
model_use.append("DDPG")
model_ensemble = model_ddpg
# model_ensemble = self.get_model("ddpg",self.train_full_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs)
# model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000
############## Training and Validation ends ##############
############## Trading starts ##############
print(
"======Trading from: ",
self.unique_trade_date[i - self.rebalance_window],
"to ",
self.unique_trade_date[i],
)
# print("Used Model: ", model_ensemble)
last_state_ensemble = self.DRL_prediction(
model=model_ensemble,
name="ensemble",
last_state=last_state_ensemble,
iter_num=i,
turbulence_threshold=turbulence_threshold,
initial=initial,
)
############## Trading ends ##############
end = time.time()
print("Ensemble Strategy took: ", (end - start) / 60, " minutes")
df_summary = pd.DataFrame(
[
iteration_list,
validation_start_date_list,
validation_end_date_list,
model_use,
a2c_sharpe_list,
ppo_sharpe_list,
ddpg_sharpe_list,
sac_sharpe_list,
td3_sharpe_list,
]
).T
df_summary.columns = [
"Iter",
"Val Start",
"Val End",
"Model Used",
"A2C Sharpe",
"PPO Sharpe",
"DDPG Sharpe",
"SAC Sharpe",
"TD3 Sharpe",
]
return df_summary
`