Home
last modified time | relevance | path

Searched refs:rewards (Results 1 – 25 of 948) sorted by relevance

12345678910>>...38

/dports/math/mlpack/mlpack-3.4.2/src/mlpack/tests/
H A Dasync_learning_test.cpp76 arma::vec rewards(20, arma::fill::zeros); variable
85 rewards[pos++] = reward; in __anonb38bb9700102()
86 pos %= rewards.n_elem; in __anonb38bb9700102()
99 double avgReward = arma::mean(rewards);
163 rewards[pos++] = reward; in __anonb38bb9700202()
164 pos %= rewards.n_elem; in __anonb38bb9700202()
177 double avgReward = arma::mean(rewards);
227 arma::vec rewards(20, arma::fill::zeros); variable
236 rewards[pos++] = reward; in __anonb38bb9700302()
237 pos %= rewards.n_elem; in __anonb38bb9700302()
[all …]
/dports/misc/mxnet/incubator-mxnet-1.9.0/example/gluon/actor_critic/
H A Dactor_critic.py70 rewards = [] variable
83 rewards.append(reward)
93 for i in range(len(rewards)-1, -1, -1):
94 R = rewards[i] + args.gamma * R
95 rewards[i] = R
96 rewards = np.array(rewards) variable
97 rewards -= rewards.mean()
98 rewards /= rewards.std() + np.finfo(rewards.dtype).eps
101 L = sum([loss(value, mx.nd.array([r])) for r, value in zip(rewards, values)])
103 for logp, r, v in zip(heads, rewards, values):
/dports/misc/py-mxnet/incubator-mxnet-1.9.0/example/gluon/actor_critic/
H A Dactor_critic.py70 rewards = [] variable
83 rewards.append(reward)
93 for i in range(len(rewards)-1, -1, -1):
94 R = rewards[i] + args.gamma * R
95 rewards[i] = R
96 rewards = np.array(rewards) variable
97 rewards -= rewards.mean()
98 rewards /= rewards.std() + np.finfo(rewards.dtype).eps
101 L = sum([loss(value, mx.nd.array([r])) for r, value in zip(rewards, values)])
103 for logp, r, v in zip(heads, rewards, values):
/dports/misc/mxnet/incubator-mxnet-1.9.0/example/reinforcement-learning/dqn/
H A Dreplay_memory.py69 self.rewards[:] = 0
87 replay_memory.rewards = numpy.zeros(self.rewards.shape, dtype='float32')
102 self.rewards[self.top] = reward
117 rewards = numpy.empty(batch_size, dtype='float32')
130 rewards[counter] = self.rewards.take(end_index, mode='wrap')
134 return actions, rewards, terminate_flags
159 rewards[counter] = self.rewards.take(end_index, mode='wrap')
163 return actions, rewards, terminate_flags
188 rewards[counter] = self.rewards.take(end_index, mode='wrap')
192 return actions, rewards, terminate_flags
[all …]
/dports/misc/py-mxnet/incubator-mxnet-1.9.0/example/reinforcement-learning/dqn/
H A Dreplay_memory.py69 self.rewards[:] = 0
87 replay_memory.rewards = numpy.zeros(self.rewards.shape, dtype='float32')
102 self.rewards[self.top] = reward
117 rewards = numpy.empty(batch_size, dtype='float32')
130 rewards[counter] = self.rewards.take(end_index, mode='wrap')
134 return actions, rewards, terminate_flags
159 rewards[counter] = self.rewards.take(end_index, mode='wrap')
163 return actions, rewards, terminate_flags
188 rewards[counter] = self.rewards.take(end_index, mode='wrap')
192 return actions, rewards, terminate_flags
[all …]
/dports/net/storj/storj-1.45.3/satellite/rewards/
H A Dpartners_db_test.go19 world := rewards.PartnerInfo{
24 hello := rewards.PartnerInfo{
29 db, err := rewards.NewPartnersStaticDB(&rewards.PartnerList{
30 Partners: []rewards.PartnerInfo{world, hello},
49 require.EqualValues(t, []rewards.PartnerInfo{hello, world}, all)
/dports/science/pybrain/pybrain-0.3.3/pybrain/rl/learners/directsearch/
H A Dgpomdp.py26 _, _, rewards, loglhs = self.ds.getSequence(seq)
27 for t in range(len(rewards)):
28 …baselines[t, :] += mean(sum(loglhs[:t + 1, :], 0) ** 2 * rewards[t, :], 0) / mean(sum(loglhs[:t + …
34 _, _, rewards, loglhs = self.ds.getSequence(seq)
35 for t in range(len(rewards)):
36 g[seq, :] += sum(loglhs[:t + 1, :], 0) * (rewards[t, :] - baselines[t])
H A Drwr.py99 rewards = []
108 rewards.append(reward)
109 return rewards
143 acts, obss, rewards = [], [], []
154 rewards.append(reward)
155 avgReward += sum(rewards) / float(len(rewards))
160 for r in reversed(rewards):
/dports/devel/py-bullet3/bullet3-3.21/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/
H A Dpath.py21 valid &= len(self.rewards) == l
27 for vals in [self.states, self.goals, self.actions, self.logps, self.rewards]:
38 self.rewards = []
44 return len(self.rewards)
47 return sum(self.rewards)
H A Drl_util.py4 def compute_return(rewards, gamma, td_lambda, val_t): argument
6 path_len = len(rewards)
10 last_val = rewards[-1] + gamma * val_t[-1]
14 curr_r = rewards[i]
/dports/science/chrono/chrono-7.0.1/src/demos/python/chrono-tensorflow/PPO/
H A Dtrain_serial.py70 observes, actions, rewards, unscaled_obs = [], [], [], []
91 rewards.append(reward)
95 np.array(rewards, dtype=np.float64), np.concatenate(unscaled_obs))
117 observes, actions, rewards, unscaled_obs = run_episode(env, policy, scaler)
121 'rewards': rewards,
149 rewards = trajectory['rewards'] * (1 - gamma)
151 rewards = trajectory['rewards']
152 disc_sum_rew = discount(rewards, gamma)
190 rewards = trajectory['rewards'] * (1 - gamma)
192 rewards = trajectory['rewards']
[all …]
H A Drun_episode.py23 observes, actions, rewards, unscaled_obs = run_episode(env_c, policy, scaler, arg[3])
27 'rewards': rewards,
49 observes, actions, rewards, unscaled_obs = [], [], [], []
70 rewards.append(reward)
74 np.array(rewards, dtype=np.float64), np.concatenate(unscaled_obs))
H A Dtrain_parallel.py108 rewards = trajectory['rewards'] * (1 - gamma)
110 rewards = trajectory['rewards']
111 disc_sum_rew = discount(rewards, gamma)
149 rewards = trajectory['rewards'] * (1 - gamma)
151 rewards = trajectory['rewards']
156 tds = rewards - values + np.append(values[1:] * gamma, 0)
/dports/www/tikiwiki/tiki-21.2/lib/goal/
H A Drewardlib.php95 function giveRewardsToUser($user, $rewards, $list = null) argument
101 foreach ($rewards as $reward) {
107 function giveRewardsToMembers($group, $rewards) argument
111 foreach ($rewards as $reward) {
120 $this->giveRewardsToUser($user, $rewards, $list);
/dports/www/tikiwiki/tiki-21.2/templates/goal/
H A Drender_rewards.tpl8 {foreach $rewards as $key => $reward}
20 <td colspan="1">{tr}No rewards yet!{/tr}</td>
25 <input type="hidden" name="rewards" value="{$rewards|json_encode|escape}">
H A Dedit_reward.tpl9 {tr}Your changes to rewards are not saved until you save the goal.{/tr}
16 {foreach $rewards as $key => $info}
22 {if $rewards.credit}
27 {foreach $rewards.credit.options as $creditType => $creditLabel}
40 {if $rewards.tracker_badge_add}
44 …kerItemBadge _value="trackeritem:`$reward.trackerItemBadge`" tracker_id=$rewards.tracker_badge_add…
/dports/www/tikiwiki/tiki-21.2/lib/core/Services/Goal/
H A DController.php194 $rewards = json_decode($input->rewards->none(), true);
195 if (is_array($rewards)) {
197 $goal['rewards'] = $rewards;
316 $rewards = json_decode($input->rewards->none(), true);
318 if (! is_array($rewards)) {
324 'rewards' => array_values(array_filter($rewards)),
/dports/games/lordsawar/lordsawar-0.3.2/src/
H A Drewardlist.cpp175 std::vector<Reward*> rewards; in pop() local
179 rewards.push_back(*iter); in pop()
181 if (rewards.size()) in pop()
183 Reward *newReward = rewards[Rnd::rand() % rewards.size()]; in pop()
/dports/science/pybrain/pybrain-0.3.3/pybrain/rl/experiments/
H A Depisodic.py38 rewards = []
43 rewards.append(r)
44 all_rewards.append(rewards)
/dports/devel/py-bullet3/bullet3-3.21/examples/pybullet/gym/pybullet_envs/
H A Dgym_manipulator_envs.py32 self.rewards = [
38 return state, sum(self.rewards), False, {}
88 self.rewards = [
94 return state, sum(self.rewards), False, {}
169 self.rewards = [
175 return state, sum(self.rewards), False, {}
242 self.rewards = [
248 return state, sum(self.rewards), False, {}
H A Dgym_pendulum_envs.py41 self.rewards = [float(reward)]
43 return state, sum(self.rewards), done, {}
87 self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
89 return state, sum(self.rewards), done, {}
/dports/math/py-gym/gym-0.21.0/gym/wrappers/monitoring/
H A Dstats_recorder.py25 self.rewards = None
65 self.rewards += reward
92 self.rewards = 0
101 self.episode_rewards.append(float(self.rewards))
/dports/math/mlpack/mlpack-3.4.2/src/mlpack/methods/reinforcement_learning/replay/
H A Drandom_replay.hpp89 rewards(capacity), in RandomReplay()
132 rewards(position) = reward; in Store()
196 sampledRewards = rewards.elem(sampledIndices).t(); in Sample()
256 arma::rowvec rewards; member in mlpack::rl::RandomReplay
/dports/games/widelands/widelands-build21/data/maps/MP_Scenarios/Island_Hopping.wmf/scripting/
H A Dfirst_island.lua23 local rewards = _finish_rewards[island_idx][rank]
26 finished_island_continues:format(format_rewards(rewards))
30 add_wares(new_hq, rewards)
/dports/math/py-gym/gym-0.21.0/gym/wrappers/
H A Drecord_episode_statistics.py26 observations, rewards, dones, infos = super(RecordEpisodeStatistics, self).step(
29 self.episode_returns += rewards
52 rewards,

12345678910>>...38