NevarokML: UNevarokMLBaseAlgorithm API
The UNevarokMLBaseAlgorithm class represents a base algorithm for reinforcement learning in NevarokML.
Properties
_algorithm
(ENevarokMLAlgorithm
): The type of algorithm._policy
(ENevarokMLPolicy
): The policy used by the algorithm._learningRate
(float
): The learning rate for the algorithm._nSteps
(int32
): The number of steps per batch._batchSize
(int32
): The batch size._nEpochs
(int32
): The number of training epochs._gamma
(float
): The discount factor for future rewards._entCoef
(float
): The coefficient for the entropy bonus._vfCoef
(float
): The coefficient for the value function loss._clipRange
(float
): The clipping range for the policy loss._maxGradNorm
(float
): The maximum gradient norm for gradient clipping._verbose
(int
): The verbosity level for logging._gaeLambda
(float
): The lambda parameter for generalized advantage estimation._useSde
(bool
): Whether to use state-dependent exploration._sdeSampleFreq
(int
): The frequency of sampling for state-dependent exploration._rmsPropEps
(float
): The epsilon value for RMSprop optimizer._useRmsProp
(bool
): Whether to use RMSprop optimizer._normalizeAdvantage
(bool
): Whether to normalize advantages._bufferSize
(int
): The size of the replay buffer._learningStarts
(int
): The number of steps before starting to learn._tau
(float
): The soft update coefficient for target networks._gradientSteps
(int
): The number of gradient steps per update._optimizeMemoryUsage
(bool
): Whether to optimize memory usage._targetUpdateInterval
(int
): The interval for updating target networks._explorationFraction
(float
): The fraction of exploration during training._explorationInitialEps
(float
): The initial value for exploration epsilon._explorationFinalEps
(float
): The final value for exploration epsilon._useSdeAtWarmup
(bool
): Whether to use state-dependent exploration during warm-up phase._policyDelay
(int
): The number of steps to delay policy updates._targetPolicyNoise
(float
): The noise added to target policy for TD3 algorithm._targetNoiseClip
(float
): The range of noise for target policy for TD3 algorithm._trainFreq
(int
): The frequency of training steps._entCoefAuto
(bool
): Whether to automatically adjust the entropy coefficient._targetEntropyAuto
(bool
): Whether to automatically adjust the target entropy._targetEntropy
(float
): The target entropy for SAC algorithm.
Methods
PPO
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* PPO(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 3e-4, const int nSteps = 2048,
const int batchSize = 64, const int nEpochs = 10, const float gamma = 0.99,
const float gaeLambda = 0.95, const float clipRange = 0.2,
const float entCoef = 0.0, const float vfCoef = 0.5,
const float maxGradNorm = 0.5, const bool useSde = false,
const int sdeSampleFreq = -1, const int verbose = 1);
A2C
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* A2C(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 7e-4, const int nSteps = 5,
const float gamma = 0.99, const float gaeLambda = 1.0,
const float entCoef = 0.0, const float vfCoef = 0.5,
const float maxGradNorm = 0.5, const float rmsPropEps = 1e-5,
const bool useRmsProp = true, const bool useSde = false,
const int sdeSampleFreq = -1, const bool normalizeAdvantage = false,
const int verbose = 1);
DDPG
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* DDPG(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 1e-3, const int bufferSize = 1000000,
const int learningStarts = 100, const int batchSize = 100,
const float tau = 0.005, const float gamma = 0.99,
const int trainFreq = 1, const int gradientSteps = -1,
const bool optimizeMemoryUsage = false, const int verbose = 1);
DQN
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* DQN(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 1e-4, const int bufferSize = 1000000,
const int learningStarts = 50000, const int batchSize = 32,
const float tau = 1.0, const float gamma = 0.99,
const int trainFreq = 4, const int gradientSteps = 1,
const bool optimizeMemoryUsage = false,
const int targetUpdateInterval = 10000,
const float explorationFraction = 0.1,
const float explorationInitialEps = 1.0,
const float explorationFinalEps = 0.05,
const float maxGradNorm = 10, const int verbose = 1);
SAC
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* SAC(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 3e-4, const int bufferSize = 1000000,
const int learningStarts = 100, const int batchSize = 256,
const float tau = 0.005, const float gamma = 0.99,
const int trainFreq = 1, const int gradientSteps = 1,
const bool optimizeMemoryUsage = false, const bool entCoefAuto = true,
const float entCoef = 0.0, const int targetUpdateInterval = 1,
const bool targetEntropyAuto = true, const float targetEntropy = 0.0,
const bool useSde = false, const int sdeSampleFreq = -1,
const bool useSdeAtWarmup = false, const int verbose = 1);
TD3
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* TD3(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 1e-3, const int bufferSize = 1000000,
const int learningStarts = 100, const int batchSize = 100,
const float tau = 0.005, const float gamma = 0.99,
const int trainFreq = 1, const int gradientSteps = -1,
const bool optimizeMemoryUsage = false, const int policyDelay = 2,
const float targetPolicyNoise = 0.2,
const float targetNoiseClip = 0.5, const int verbose = 1);