Agent57 light

class srl.algorithms.agent57_light.agent57_light.Config(framework: ~typing.Literal['auto', 'tensorflow', 'torch'] = 'auto', observation_mode: ~typing.Literal['', 'render_image'] = '', override_env_observation_type: ~srl.base.define.SpaceTypes = SpaceTypes.UNKNOWN, override_observation_type: str | ~srl.base.define.RLBaseTypes = <RLBaseTypes.NONE: 1>, override_action_type: str | ~srl.base.define.RLBaseTypes = <RLBaseTypes.NONE: 1>, action_division_num: int = 10, observation_division_num: int = 1000, frameskip: int = 0, extend_worker: ~typing.Type[ExtendWorker] | None = None, processors: ~typing.List[RLProcessor] = <factory>, render_image_processors: ~typing.List[RLProcessor] = <factory>, enable_rl_processors: bool = True, enable_state_encode: bool = True, enable_action_decode: bool = True, window_length: int = 1, render_image_window_length: int = 1, render_last_step: bool = True, render_rl_image: bool = True, render_rl_image_size: ~typing.Tuple[int, int] = (128, 128), enable_sanitize: bool = True, enable_assertion: bool = False, dtype: str = 'float32', test_epsilon: float = 0, test_beta: float = 0, batch_size: int = 32, memory: ~srl.rl.memories.priority_replay_buffer.PriorityReplayBufferConfig = <factory>, lr_ext: float = 0.0001, lr_ext_scheduler: ~srl.rl.schedulers.lr_scheduler.LRSchedulerConfig = <factory>, lr_int: float = 0.0001, lr_int_scheduler: ~srl.rl.schedulers.lr_scheduler.LRSchedulerConfig = <factory>, target_model_update_interval: int = 1500, enable_double_dqn: bool = True, enable_rescale: bool = False, input_value_block: ~srl.rl.models.config.input_value_block.InputValueBlockConfig = <factory>, input_image_block: ~srl.rl.models.config.input_image_block.InputImageBlockConfig = <factory>, actor_num: int = 32, ucb_window_size: int = 3600, ucb_epsilon: float = 0.01, ucb_beta: float = 1, enable_intrinsic_reward: bool = True, episodic_lr: float = 0.0005, episodic_lr_scheduler: ~srl.rl.schedulers.lr_scheduler.LRSchedulerConfig = <factory>, episodic_count_max: int = 10, episodic_epsilon: float = 0.001, episodic_cluster_distance: float = 0.008, episodic_memory_capacity: int = 30000, episodic_pseudo_counts: float = 0.1, lifelong_lr: float = 0.0005, lifelong_lr_scheduler: ~srl.rl.schedulers.lr_scheduler.LRSchedulerConfig = <factory>, lifelong_max: float = 5.0, input_ext_reward: bool = True, input_int_reward: bool = False, input_action: bool = False, disable_int_priority: bool = False, dummy_state_val: float = 0.0)

<RLConfigComponentFramework>

test_epsilon: float = 0

ε-greedy parameter for Test

test_beta: float = 0

intrinsic reward rate for Test

batch_size: int = 32

Batch size

memory: PriorityReplayBufferConfig

<PriorityReplayBuffer>

lr_ext: float = 0.0001

Learning rate

lr_ext_scheduler: LRSchedulerConfig

<LRSchaduler>

lr_int: float = 0.0001

Intrinsic network Learning rate

lr_int_scheduler: LRSchedulerConfig

<LRSchaduler>

target_model_update_interval: int = 1500

Synchronization interval to Target network

enable_double_dqn: bool = True

enable DoubleDQN

enable_rescale: bool = False

enable rescaling

input_value_block: InputValueBlockConfig

<InputValueBlock>

input_image_block: InputImageBlockConfig

<InputImageBlock>

hidden_block: DuelingNetworkConfig

<DuelingNetwork> hidden layer

actor_num: int = 32

ucb(160,0.5 or 3600,0.01)

ucb_window_size: int = 3600

UCB上限

ucb_epsilon: float = 0.01

UCBを使う確率

ucb_beta: float = 1

UCBのβ

enable_intrinsic_reward: bool = True

enable intrinsic reward

episodic_lr: float = 0.0005

Episodic Learning rate

episodic_lr_scheduler: LRSchedulerConfig

<LRSchaduler>

episodic_count_max: int = 10

[episodic] k

episodic_epsilon: float = 0.001

[episodic] epsilon

episodic_cluster_distance: float = 0.008

[episodic] cluster_distance

episodic_memory_capacity: int = 30000

[episodic] capacity

episodic_pseudo_counts: float = 0.1

[episodic] 疑似カウント定数(c)

episodic_emb_block: MLPBlockConfig

<MLPBlock> [episodic] emb block

episodic_out_block: MLPBlockConfig

<MLPBlock> [episodic] out block

lifelong_lr: float = 0.0005

Lifelong Learning rate

lifelong_lr_scheduler: LRSchedulerConfig

<LRSchaduler>

lifelong_max: float = 5.0

[lifelong] L

lifelong_hidden_block: MLPBlockConfig

<MLPBlock> [lifelong] hidden block

input_ext_reward: bool = True

[UVFA] input ext reward

input_int_reward: bool = False

[UVFA] input int reward

input_action: bool = False

[UVFA] input action

disable_int_priority: bool = False

Not use internal rewards to calculate priority

dummy_state_val: float = 0.0

dummy_state_val