1from gym.envs.mujoco import mujoco_env 2from gym import utils 3import numpy as np 4 5 6class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): 7 def __init__(self): 8 mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5) 9 utils.EzPickle.__init__(self) 10 11 def _get_obs(self): 12 data = self.sim.data 13 return np.concatenate( 14 [ 15 data.qpos.flat[2:], 16 data.qvel.flat, 17 data.cinert.flat, 18 data.cvel.flat, 19 data.qfrc_actuator.flat, 20 data.cfrc_ext.flat, 21 ] 22 ) 23 24 def step(self, a): 25 self.do_simulation(a, self.frame_skip) 26 pos_after = self.sim.data.qpos[2] 27 data = self.sim.data 28 uph_cost = (pos_after - 0) / self.model.opt.timestep 29 30 quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 31 quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() 32 quad_impact_cost = min(quad_impact_cost, 10) 33 reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 34 35 done = bool(False) 36 return ( 37 self._get_obs(), 38 reward, 39 done, 40 dict( 41 reward_linup=uph_cost, 42 reward_quadctrl=-quad_ctrl_cost, 43 reward_impact=-quad_impact_cost, 44 ), 45 ) 46 47 def reset_model(self): 48 c = 0.01 49 self.set_state( 50 self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 51 self.init_qvel 52 + self.np_random.uniform( 53 low=-c, 54 high=c, 55 size=self.model.nv, 56 ), 57 ) 58 return self._get_obs() 59 60 def viewer_setup(self): 61 self.viewer.cam.trackbodyid = 1 62 self.viewer.cam.distance = self.model.stat.extent * 1.0 63 self.viewer.cam.lookat[2] = 0.8925 64 self.viewer.cam.elevation = -20 65