SND(Self-supervised Network Distillation)

class srl.algorithms.snd.Config(observation_mode: str | ~srl.base.define.ObservationModes = ObservationModes.ENV, override_observation_type: ~srl.base.define.SpaceTypes = SpaceTypes.UNKNOWN, override_action_type: str | ~srl.base.define.RLBaseActTypes = <RLBaseActTypes.NONE: 1>, action_division_num: int = 10, observation_division_num: int = 1000, frameskip: int = 0, extend_worker: ~typing.Type[ExtendWorker] | None = None, parameter_path: str = '', memory_path: str = '', use_rl_processor: bool = True, processors: ~typing.List[RLProcessor] = <factory>, render_image_processors: ~typing.List[RLProcessor] = <factory>, enable_state_encode: bool = True, enable_action_decode: bool = True, enable_reward_encode: bool = True, enable_done_encode: bool = True, window_length: int = 1, render_image_window_length: int = 1, enable_sanitize: bool = True, enable_assertion: bool = False, test_epsilon: float = 0, epsilon: float = 0.001, lr: float | ~srl.rl.schedulers.scheduler.SchedulerConfig = 0.001, batch_size: int = 32, memory_capacity: int = 100000, memory_warmup_size: int = 1000, discount: float = 0.99, target_model_update_interval: int = 1000, int_reward_scale: float = 0.5)

<RLConfigComponentInput>

test_epsilon: float = 0

ε-greedy parameter for Test

epsilon: float = 0.001

ε-greedy parameter for Train

lr: float | SchedulerConfig = 0.001

<Scheduler> Learning rate

batch_size: int = 32

Batch size

memory_capacity: int = 100000

capacity

memory_warmup_size: int = 1000

warmup_size

discount: float = 0.99

Discount rate

target_model_update_interval: int = 1000

Synchronization interval to Target network

int_reward_scale: float = 0.5

int reward scale

hidden_block: MLPBlockConfig

<MLPBlock> hidden layer