I'm trying to run the code following the guide in README, only to find an error . It seems to have something wrong with the multinomial distribution. I want to find out what I can do to run the code, thank you.
Here are some versions of packages that I use
python == 3.6.15, torch == 1.4.0, tianshou == 0.3.0, pgmpy == 0.1.19
The detailed error information is listed as follow.
RuntimeError Traceback (most recent call last)
/data/mengtianxin/code/BR-Agent/Train/dx_train_norm.py in
170 if name == 'main':
171 args1 = get_args()
--> 172 result = test_a2c(args1)
/data/mengtianxin/code/BR-Agent/Train/dx_train_norm.py in test_a2c(args)
162 policy, train_collector, test_collector, args.epoch,
163 args.step_per_epoch, args.collect_per_step, args.repeat_per_collect,
--> 164 len(goals['test']), args.batch_size, writer=writer, verbose=True, test_probs=False)
165 path = path + ' ' + str(result['best_rate'])+" mate_num_"+str(result['best_mate_num']) + " best_len_"+str(result['best_len'])
166 return result
/data/mengtianxin/code/BR-Agent/a2c/Policy.py in Myonpolicy_trainer(policy, train_collector, test_collector, max_epoch, step_per_epoch, collect_per_step, repeat_per_collect, episode_per_test, batch_size, train_fn, test_fn, stop_fn, save_fn, writer, log_interval, verbose, test_in_train, test_probs)
95 if train_fn:
96 train_fn(epoch, env_step)
---> 97 result = train_collector.collect(n_episode=collect_per_step)
98 env_step += int(result["n/st"])
99 data = {
/data/mengtianxin/code/BR-Agent/a2c/Collect.py in collect(self, n_step, n_episode, random, render, no_grad)
258 if no_grad:
259 with torch.no_grad(): # faster than retain_grad version
--> 260 result = self.policy(self.data, last_state)
261 else:
262 result = self.policy(self.data, last_state)
~/anaconda3/envs/BR_AGENT/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/data/mengtianxin/code/BR-Agent/a2c/A2C.py in forward(self, batch, state, **kwargs)
108 dist = self.dist_fn(probs=logits) # type: ignore
109 if self.training:
--> 110 act = dist.sample()
111 else:
112 act = torch.argmax(logits, -1)
~/anaconda3/envs/BR_AGENT/lib/python3.6/site-packages/torch/distributions/categorical.py in sample(self, sample_shape)
105 probs = self.probs.expand(param_shape)
106 probs_2d = probs.reshape(-1, self._num_events)
--> 107 sample_2d = torch.multinomial(probs_2d, 1, True)
108 return sample_2d.reshape(sample_shape)
109
RuntimeError: invalid multinomial distribution (encountering probability entry < 0)