/dports/math/mlpack/mlpack-3.4.2/src/mlpack/tests/ |
H A D | async_learning_test.cpp | 76 arma::vec rewards(20, arma::fill::zeros); variable 85 rewards[pos++] = reward; in __anonb38bb9700102() 86 pos %= rewards.n_elem; in __anonb38bb9700102() 99 double avgReward = arma::mean(rewards); 163 rewards[pos++] = reward; in __anonb38bb9700202() 164 pos %= rewards.n_elem; in __anonb38bb9700202() 177 double avgReward = arma::mean(rewards); 227 arma::vec rewards(20, arma::fill::zeros); variable 236 rewards[pos++] = reward; in __anonb38bb9700302() 237 pos %= rewards.n_elem; in __anonb38bb9700302() [all …]
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/example/gluon/actor_critic/ |
H A D | actor_critic.py | 70 rewards = [] variable 83 rewards.append(reward) 93 for i in range(len(rewards)-1, -1, -1): 94 R = rewards[i] + args.gamma * R 95 rewards[i] = R 96 rewards = np.array(rewards) variable 97 rewards -= rewards.mean() 98 rewards /= rewards.std() + np.finfo(rewards.dtype).eps 101 L = sum([loss(value, mx.nd.array([r])) for r, value in zip(rewards, values)]) 103 for logp, r, v in zip(heads, rewards, values):
|
/dports/misc/py-mxnet/incubator-mxnet-1.9.0/example/gluon/actor_critic/ |
H A D | actor_critic.py | 70 rewards = [] variable 83 rewards.append(reward) 93 for i in range(len(rewards)-1, -1, -1): 94 R = rewards[i] + args.gamma * R 95 rewards[i] = R 96 rewards = np.array(rewards) variable 97 rewards -= rewards.mean() 98 rewards /= rewards.std() + np.finfo(rewards.dtype).eps 101 L = sum([loss(value, mx.nd.array([r])) for r, value in zip(rewards, values)]) 103 for logp, r, v in zip(heads, rewards, values):
|
/dports/misc/mxnet/incubator-mxnet-1.9.0/example/reinforcement-learning/dqn/ |
H A D | replay_memory.py | 69 self.rewards[:] = 0 87 replay_memory.rewards = numpy.zeros(self.rewards.shape, dtype='float32') 102 self.rewards[self.top] = reward 117 rewards = numpy.empty(batch_size, dtype='float32') 130 rewards[counter] = self.rewards.take(end_index, mode='wrap') 134 return actions, rewards, terminate_flags 159 rewards[counter] = self.rewards.take(end_index, mode='wrap') 163 return actions, rewards, terminate_flags 188 rewards[counter] = self.rewards.take(end_index, mode='wrap') 192 return actions, rewards, terminate_flags [all …]
|
/dports/misc/py-mxnet/incubator-mxnet-1.9.0/example/reinforcement-learning/dqn/ |
H A D | replay_memory.py | 69 self.rewards[:] = 0 87 replay_memory.rewards = numpy.zeros(self.rewards.shape, dtype='float32') 102 self.rewards[self.top] = reward 117 rewards = numpy.empty(batch_size, dtype='float32') 130 rewards[counter] = self.rewards.take(end_index, mode='wrap') 134 return actions, rewards, terminate_flags 159 rewards[counter] = self.rewards.take(end_index, mode='wrap') 163 return actions, rewards, terminate_flags 188 rewards[counter] = self.rewards.take(end_index, mode='wrap') 192 return actions, rewards, terminate_flags [all …]
|
/dports/net/storj/storj-1.45.3/satellite/rewards/ |
H A D | partners_db_test.go | 19 world := rewards.PartnerInfo{ 24 hello := rewards.PartnerInfo{ 29 db, err := rewards.NewPartnersStaticDB(&rewards.PartnerList{ 30 Partners: []rewards.PartnerInfo{world, hello}, 49 require.EqualValues(t, []rewards.PartnerInfo{hello, world}, all)
|
/dports/science/pybrain/pybrain-0.3.3/pybrain/rl/learners/directsearch/ |
H A D | gpomdp.py | 26 _, _, rewards, loglhs = self.ds.getSequence(seq) 27 for t in range(len(rewards)): 28 …baselines[t, :] += mean(sum(loglhs[:t + 1, :], 0) ** 2 * rewards[t, :], 0) / mean(sum(loglhs[:t + … 34 _, _, rewards, loglhs = self.ds.getSequence(seq) 35 for t in range(len(rewards)): 36 g[seq, :] += sum(loglhs[:t + 1, :], 0) * (rewards[t, :] - baselines[t])
|
H A D | rwr.py | 99 rewards = [] 108 rewards.append(reward) 109 return rewards 143 acts, obss, rewards = [], [], [] 154 rewards.append(reward) 155 avgReward += sum(rewards) / float(len(rewards)) 160 for r in reversed(rewards):
|
/dports/devel/py-bullet3/bullet3-3.21/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ |
H A D | path.py | 21 valid &= len(self.rewards) == l 27 for vals in [self.states, self.goals, self.actions, self.logps, self.rewards]: 38 self.rewards = [] 44 return len(self.rewards) 47 return sum(self.rewards)
|
H A D | rl_util.py | 4 def compute_return(rewards, gamma, td_lambda, val_t): argument 6 path_len = len(rewards) 10 last_val = rewards[-1] + gamma * val_t[-1] 14 curr_r = rewards[i]
|
/dports/science/chrono/chrono-7.0.1/src/demos/python/chrono-tensorflow/PPO/ |
H A D | train_serial.py | 70 observes, actions, rewards, unscaled_obs = [], [], [], [] 91 rewards.append(reward) 95 np.array(rewards, dtype=np.float64), np.concatenate(unscaled_obs)) 117 observes, actions, rewards, unscaled_obs = run_episode(env, policy, scaler) 121 'rewards': rewards, 149 rewards = trajectory['rewards'] * (1 - gamma) 151 rewards = trajectory['rewards'] 152 disc_sum_rew = discount(rewards, gamma) 190 rewards = trajectory['rewards'] * (1 - gamma) 192 rewards = trajectory['rewards'] [all …]
|
H A D | run_episode.py | 23 observes, actions, rewards, unscaled_obs = run_episode(env_c, policy, scaler, arg[3]) 27 'rewards': rewards, 49 observes, actions, rewards, unscaled_obs = [], [], [], [] 70 rewards.append(reward) 74 np.array(rewards, dtype=np.float64), np.concatenate(unscaled_obs))
|
H A D | train_parallel.py | 108 rewards = trajectory['rewards'] * (1 - gamma) 110 rewards = trajectory['rewards'] 111 disc_sum_rew = discount(rewards, gamma) 149 rewards = trajectory['rewards'] * (1 - gamma) 151 rewards = trajectory['rewards'] 156 tds = rewards - values + np.append(values[1:] * gamma, 0)
|
/dports/www/tikiwiki/tiki-21.2/lib/goal/ |
H A D | rewardlib.php | 95 function giveRewardsToUser($user, $rewards, $list = null) argument 101 foreach ($rewards as $reward) { 107 function giveRewardsToMembers($group, $rewards) argument 111 foreach ($rewards as $reward) { 120 $this->giveRewardsToUser($user, $rewards, $list);
|
/dports/www/tikiwiki/tiki-21.2/templates/goal/ |
H A D | render_rewards.tpl | 8 {foreach $rewards as $key => $reward} 20 <td colspan="1">{tr}No rewards yet!{/tr}</td> 25 <input type="hidden" name="rewards" value="{$rewards|json_encode|escape}">
|
H A D | edit_reward.tpl | 9 {tr}Your changes to rewards are not saved until you save the goal.{/tr} 16 {foreach $rewards as $key => $info} 22 {if $rewards.credit} 27 {foreach $rewards.credit.options as $creditType => $creditLabel} 40 {if $rewards.tracker_badge_add} 44 …kerItemBadge _value="trackeritem:`$reward.trackerItemBadge`" tracker_id=$rewards.tracker_badge_add…
|
/dports/www/tikiwiki/tiki-21.2/lib/core/Services/Goal/ |
H A D | Controller.php | 194 $rewards = json_decode($input->rewards->none(), true); 195 if (is_array($rewards)) { 197 $goal['rewards'] = $rewards; 316 $rewards = json_decode($input->rewards->none(), true); 318 if (! is_array($rewards)) { 324 'rewards' => array_values(array_filter($rewards)),
|
/dports/games/lordsawar/lordsawar-0.3.2/src/ |
H A D | rewardlist.cpp | 175 std::vector<Reward*> rewards; in pop() local 179 rewards.push_back(*iter); in pop() 181 if (rewards.size()) in pop() 183 Reward *newReward = rewards[Rnd::rand() % rewards.size()]; in pop()
|
/dports/science/pybrain/pybrain-0.3.3/pybrain/rl/experiments/ |
H A D | episodic.py | 38 rewards = [] 43 rewards.append(r) 44 all_rewards.append(rewards)
|
/dports/devel/py-bullet3/bullet3-3.21/examples/pybullet/gym/pybullet_envs/ |
H A D | gym_manipulator_envs.py | 32 self.rewards = [ 38 return state, sum(self.rewards), False, {} 88 self.rewards = [ 94 return state, sum(self.rewards), False, {} 169 self.rewards = [ 175 return state, sum(self.rewards), False, {} 242 self.rewards = [ 248 return state, sum(self.rewards), False, {}
|
H A D | gym_pendulum_envs.py | 41 self.rewards = [float(reward)] 43 return state, sum(self.rewards), done, {} 87 self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)] 89 return state, sum(self.rewards), done, {}
|
/dports/math/py-gym/gym-0.21.0/gym/wrappers/monitoring/ |
H A D | stats_recorder.py | 25 self.rewards = None 65 self.rewards += reward 92 self.rewards = 0 101 self.episode_rewards.append(float(self.rewards))
|
/dports/math/mlpack/mlpack-3.4.2/src/mlpack/methods/reinforcement_learning/replay/ |
H A D | random_replay.hpp | 89 rewards(capacity), in RandomReplay() 132 rewards(position) = reward; in Store() 196 sampledRewards = rewards.elem(sampledIndices).t(); in Sample() 256 arma::rowvec rewards; member in mlpack::rl::RandomReplay
|
/dports/games/widelands/widelands-build21/data/maps/MP_Scenarios/Island_Hopping.wmf/scripting/ |
H A D | first_island.lua | 23 local rewards = _finish_rewards[island_idx][rank] 26 finished_island_continues:format(format_rewards(rewards)) 30 add_wares(new_hq, rewards)
|
/dports/math/py-gym/gym-0.21.0/gym/wrappers/ |
H A D | record_episode_statistics.py | 26 observations, rewards, dones, infos = super(RecordEpisodeStatistics, self).step( 29 self.episode_returns += rewards 52 rewards,
|