I know that the neural network is defined in model.py as the LSTMPolicy class. I am tr

Here is what I have tried: <div class="snippet-clipboard-content notranslate posit

Solved. This is the correct def act: <div class="snippet-clipboard-content notrans

How to modify network architecture about universe-starter-agent HOT 2 CLOSED

openai commented on September 7, 2024

How to modify network architecture

from universe-starter-agent.

Comments (2)

hmate9 commented on September 7, 2024

Here is what I have tried:

class DensePolicy(object):
    def __init__(self, ob_space, ac_space):
        # The input is the observed state
        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space))
        # The observation space may be multi-D, so flatten it
        x = flatten(x)
        # Add the output for getting the action
        self.logits = linear(x, ac_space, "action", normalized_columns_initializer(0.01))
        # Now for getting the value
        self.vf = tf.reshape(linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1])
        # One-hot encode it
        self.sample = categorical_sample(self.logits, ac_space)[0, :]

        # No idea what this does
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)

    def get_initial_features(self):
        # TODO: I have no idea what this function should be doing
        return []

    # Run the observation through the network to determine the action
    def act(self, ob):
        sess = tf.get_default_session()
        print("Closed:", sess._closed)
        return sess.run([self.sample, self.logits] + [], 
            feed_dict={self.x: [ob]})

    def value(self, ob):
        sess = tf.get_default_session()
        return sess.run(self.vf, 
            feed_dict={self.x: [ob]})

However, after around 8-10 steps tensorflow quits saying that it is trying to use a session that is already closed:

[2016-12-10 20:22:05,428] Initializing all parameters.
[2016-12-10 20:22:06,140] Resetting environment
[2016-12-10 20:22:06,140] Starting training at step=0
Closed: False
Closed: False
Closed: False
Closed: False
[2016-12-10 20:22:06,153] Episode terminating: episode_reward=-10 episode_length=4
[2016-12-10 20:22:06,155] Resetting environment
Episode finished. Sum of rewards: -10. Length: 4
Closed: False
Closed: False
Closed: False
Closed: False
[2016-12-10 20:22:06,162] Episode terminating: episode_reward=-10 episode_length=4
[2016-12-10 20:22:06,164] Resetting environment
Episode finished. Sum of rewards: -10. Length: 4
Closed: True
Exception in thread Thread-1:
Traceback (most recent call last):
  File "/usr/local/Cellar/python3/3.5.2_3/Frameworks/Python.framework/Versions/3.5/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/Users/matehegedus/Downloads/universe-starter-agent/a3c.py", line 92, in run
    self._run()
  File "/usr/local/Cellar/python3/3.5.2_3/Frameworks/Python.framework/Versions/3.5/lib/python3.5/contextlib.py", line 77, in __exit__
    self.gen.throw(type, value, traceback)
  File "/usr/local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3517, in get_controller
    yield default
  File "/Users/matehegedus/Downloads/universe-starter-agent/a3c.py", line 92, in run
    self._run()
  File "/Users/matehegedus/Downloads/universe-starter-agent/a3c.py", line 101, in _run
    self.queue.put(next(rollout_provider), timeout=600.0)
  File "/Users/matehegedus/Downloads/universe-starter-agent/a3c.py", line 122, in env_runner
    fetched = policy.act(last_state, *last_features)
  File "/Users/matehegedus/Downloads/universe-starter-agent/model.py", line 70, in act
    feed_dict={self.x: [ob]})
  File "/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 766, in run
    run_metadata_ptr)
  File "/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 902, in _run
    raise RuntimeError('Attempted to use a closed Session.')
RuntimeError: Attempted to use a closed Session.

Traceback (most recent call last):
  File "worker.py", line 122, in <module>
    tf.app.run()
  File "/usr/local/lib/python3.5/site-packages/tensorflow/python/platform/app.py", line 43, in run
    sys.exit(main(sys.argv[:1] + flags_passthrough))
  File "worker.py", line 114, in main
    run(args, server)
  File "worker.py", line 61, in run
    trainer.process(sess)
  File "/Users/matehegedus/Downloads/universe-starter-agent/a3c.py", line 278, in process
    fetched = sess.run(fetches, feed_dict=feed_dict)
  File "/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 766, in run
    run_metadata_ptr)
  File "/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 937, in _run
    np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
  File "/usr/local/lib/python3.5/site-packages/numpy/core/numeric.py", line 482, in asarray
    return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.```

from universe-starter-agent.

hmate9 commented on September 7, 2024

Solved. This is the correct def act:

def act(self, ob):
        sess = tf.get_default_session()
        print("Closed:", sess._closed)
        return sess.run([self.sample, self.vf] + [], 
            feed_dict={self.x: [ob]})

from universe-starter-agent.

How to modify network architecture about universe-starter-agent HOT 2 CLOSED

Comments (2)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent