Agent57 light

class srl.algorithms.agent57_light.agent57_light.Config(framework: str = 'auto', batch_size: int = 32, memory_capacity: int = 100000, memory_warmup_size: int = 1000, memory_compress: bool = True, memory_compress_level: int = -1, observation_mode: str | ~srl.base.define.ObservationModes = ObservationModes.ENV, override_observation_type: ~srl.base.define.SpaceTypes = SpaceTypes.UNKNOWN, override_action_type: str | ~srl.base.define.RLBaseActTypes = <RLBaseActTypes.NONE: 1>, action_division_num: int = 10, observation_division_num: int = 1000, frameskip: int = 0, extend_worker: ~typing.Type[ExtendWorker] | None = None, parameter_path: str = '', memory_path: str = '', use_rl_processor: bool = True, processors: ~typing.List[RLProcessor] = <factory>, render_image_processors: ~typing.List[RLProcessor] = <factory>, enable_state_encode: bool = True, enable_action_decode: bool = True, enable_reward_encode: bool = True, enable_done_encode: bool = True, window_length: int = 1, render_image_window_length: int = 1, enable_sanitize: bool = True, enable_assertion: bool = False, test_epsilon: float = 0, test_beta: float = 0, lr_ext: float | ~srl.rl.schedulers.scheduler.SchedulerConfig = 0.0001, lr_int: float | ~srl.rl.schedulers.scheduler.SchedulerConfig = 0.0001, target_model_update_interval: int = 1500, enable_double_dqn: bool = True, enable_rescale: bool = False, actor_num: int = 32, ucb_window_size: int = 3600, ucb_epsilon: float = 0.01, ucb_beta: float = 1, enable_intrinsic_reward: bool = True, episodic_lr: float | ~srl.rl.schedulers.scheduler.SchedulerConfig = 0.0005, episodic_count_max: int = 10, episodic_epsilon: float = 0.001, episodic_cluster_distance: float = 0.008, episodic_memory_capacity: int = 30000, episodic_pseudo_counts: float = 0.1, lifelong_lr: float | ~srl.rl.schedulers.scheduler.SchedulerConfig = 0.0005, lifelong_max: float = 5.0, input_ext_reward: bool = True, input_int_reward: bool = False, input_action: bool = False, disable_int_priority: bool = False, dummy_state_val: float = 0.0)

<PriorityExperienceReplay> <RLConfigComponentFramework> <RLConfigComponentInput>

test_epsilon: float = 0

ε-greedy parameter for Test

test_beta: float = 0

intrinsic reward rate for Test

lr_ext: float | SchedulerConfig = 0.0001

<Scheduler> Learning rate

lr_int: float | SchedulerConfig = 0.0001

<Scheduler> Intrinsic network Learning rate

target_model_update_interval: int = 1500

Synchronization interval to Target network

enable_double_dqn: bool = True

enable DoubleDQN

enable_rescale: bool = False

enable rescaling

hidden_block: DuelingNetworkConfig

<DuelingNetwork> hidden layer

actor_num: int = 32

ucb(160,0.5 or 3600,0.01)

ucb_window_size: int = 3600

UCB上限

ucb_epsilon: float = 0.01

UCBを使う確率

ucb_beta: float = 1

UCBのβ

enable_intrinsic_reward: bool = True

enable intrinsic reward

episodic_lr: float | SchedulerConfig = 0.0005

<Scheduler> Episodic Learning rate

episodic_count_max: int = 10

[episodic] k

episodic_epsilon: float = 0.001

[episodic] epsilon

episodic_cluster_distance: float = 0.008

[episodic] cluster_distance

episodic_memory_capacity: int = 30000

[episodic] capacity

episodic_pseudo_counts: float = 0.1

[episodic] 疑似カウント定数(c)

episodic_emb_block: MLPBlockConfig

<MLPBlock> [episodic] emb block

episodic_out_block: MLPBlockConfig

<MLPBlock> [episodic] out block

lifelong_lr: float | SchedulerConfig = 0.0005

<Scheduler> Lifelong Learning rate

lifelong_max: float = 5.0

[lifelong] L

lifelong_hidden_block: MLPBlockConfig

<MLPBlock> [lifelong] hidden block

input_ext_reward: bool = True

[UVFA] input ext reward

input_int_reward: bool = False

[UVFA] input int reward

input_action: bool = False

[UVFA] input action

disable_int_priority: bool = False

Not use internal rewards to calculate priority

dummy_state_val: float = 0.0

dummy_state_val