1import numpy as np 2from gym import utils 3from gym.envs.mujoco import mujoco_env 4 5 6class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 def __init__(self): 8 mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 5) 9 utils.EzPickle.__init__(self) 10 11 def step(self, action): 12 xposbefore = self.sim.data.qpos[0] 13 self.do_simulation(action, self.frame_skip) 14 xposafter = self.sim.data.qpos[0] 15 ob = self._get_obs() 16 reward_ctrl = -0.1 * np.square(action).sum() 17 reward_run = (xposafter - xposbefore) / self.dt 18 reward = reward_ctrl + reward_run 19 done = False 20 return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 21 22 def _get_obs(self): 23 return np.concatenate( 24 [ 25 self.sim.data.qpos.flat[1:], 26 self.sim.data.qvel.flat, 27 ] 28 ) 29 30 def reset_model(self): 31 qpos = self.init_qpos + self.np_random.uniform( 32 low=-0.1, high=0.1, size=self.model.nq 33 ) 34 qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 35 self.set_state(qpos, qvel) 36 return self._get_obs() 37 38 def viewer_setup(self): 39 self.viewer.cam.distance = self.model.stat.extent * 0.5 40