1from gym.envs.mujoco import mujoco_env
2from gym import utils
3import numpy as np
4
5
6class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7    def __init__(self):
8        mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5)
9        utils.EzPickle.__init__(self)
10
11    def _get_obs(self):
12        data = self.sim.data
13        return np.concatenate(
14            [
15                data.qpos.flat[2:],
16                data.qvel.flat,
17                data.cinert.flat,
18                data.cvel.flat,
19                data.qfrc_actuator.flat,
20                data.cfrc_ext.flat,
21            ]
22        )
23
24    def step(self, a):
25        self.do_simulation(a, self.frame_skip)
26        pos_after = self.sim.data.qpos[2]
27        data = self.sim.data
28        uph_cost = (pos_after - 0) / self.model.opt.timestep
29
30        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
31        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
32        quad_impact_cost = min(quad_impact_cost, 10)
33        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
34
35        done = bool(False)
36        return (
37            self._get_obs(),
38            reward,
39            done,
40            dict(
41                reward_linup=uph_cost,
42                reward_quadctrl=-quad_ctrl_cost,
43                reward_impact=-quad_impact_cost,
44            ),
45        )
46
47    def reset_model(self):
48        c = 0.01
49        self.set_state(
50            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
51            self.init_qvel
52            + self.np_random.uniform(
53                low=-c,
54                high=c,
55                size=self.model.nv,
56            ),
57        )
58        return self._get_obs()
59
60    def viewer_setup(self):
61        self.viewer.cam.trackbodyid = 1
62        self.viewer.cam.distance = self.model.stat.extent * 1.0
63        self.viewer.cam.lookat[2] = 0.8925
64        self.viewer.cam.elevation = -20
65