NevarokML: UNevarokMLBaseAlgorithm API
The UNevarokMLBaseAlgorithm class represents a base algorithm for reinforcement learning in NevarokML.
Properties
_algorithm(ENevarokMLAlgorithm): The type of algorithm._policy(ENevarokMLPolicy): The policy used by the algorithm._learningRate(float): The learning rate for the algorithm._nSteps(int32): The number of steps per batch._batchSize(int32): The batch size._nEpochs(int32): The number of training epochs._gamma(float): The discount factor for future rewards._entCoef(float): The coefficient for the entropy bonus._vfCoef(float): The coefficient for the value function loss._clipRange(float): The clipping range for the policy loss._maxGradNorm(float): The maximum gradient norm for gradient clipping._verbose(int): The verbosity level for logging._gaeLambda(float): The lambda parameter for generalized advantage estimation._useSde(bool): Whether to use state-dependent exploration._sdeSampleFreq(int): The frequency of sampling for state-dependent exploration._rmsPropEps(float): The epsilon value for RMSprop optimizer._useRmsProp(bool): Whether to use RMSprop optimizer._normalizeAdvantage(bool): Whether to normalize advantages._bufferSize(int): The size of the replay buffer._learningStarts(int): The number of steps before starting to learn._tau(float): The soft update coefficient for target networks._gradientSteps(int): The number of gradient steps per update._optimizeMemoryUsage(bool): Whether to optimize memory usage._targetUpdateInterval(int): The interval for updating target networks._explorationFraction(float): The fraction of exploration during training._explorationInitialEps(float): The initial value for exploration epsilon._explorationFinalEps(float): The final value for exploration epsilon._useSdeAtWarmup(bool): Whether to use state-dependent exploration during warm-up phase._policyDelay(int): The number of steps to delay policy updates._targetPolicyNoise(float): The noise added to target policy for TD3 algorithm._targetNoiseClip(float): The range of noise for target policy for TD3 algorithm._trainFreq(int): The frequency of training steps._entCoefAuto(bool): Whether to automatically adjust the entropy coefficient._targetEntropyAuto(bool): Whether to automatically adjust the target entropy._targetEntropy(float): The target entropy for SAC algorithm.
Methods
PPO
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* PPO(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 3e-4, const int nSteps = 2048,
const int batchSize = 64, const int nEpochs = 10, const float gamma = 0.99,
const float gaeLambda = 0.95, const float clipRange = 0.2,
const float entCoef = 0.0, const float vfCoef = 0.5,
const float maxGradNorm = 0.5, const bool useSde = false,
const int sdeSampleFreq = -1, const int verbose = 1);
A2C
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* A2C(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 7e-4, const int nSteps = 5,
const float gamma = 0.99, const float gaeLambda = 1.0,
const float entCoef = 0.0, const float vfCoef = 0.5,
const float maxGradNorm = 0.5, const float rmsPropEps = 1e-5,
const bool useRmsProp = true, const bool useSde = false,
const int sdeSampleFreq = -1, const bool normalizeAdvantage = false,
const int verbose = 1);
DDPG
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* DDPG(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 1e-3, const int bufferSize = 1000000,
const int learningStarts = 100, const int batchSize = 100,
const float tau = 0.005, const float gamma = 0.99,
const int trainFreq = 1, const int gradientSteps = -1,
const bool optimizeMemoryUsage = false, const int verbose = 1);
DQN
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* DQN(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 1e-4, const int bufferSize = 1000000,
const int learningStarts = 50000, const int batchSize = 32,
const float tau = 1.0, const float gamma = 0.99,
const int trainFreq = 4, const int gradientSteps = 1,
const bool optimizeMemoryUsage = false,
const int targetUpdateInterval = 10000,
const float explorationFraction = 0.1,
const float explorationInitialEps = 1.0,
const float explorationFinalEps = 0.05,
const float maxGradNorm = 10, const int verbose = 1);
SAC
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* SAC(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 3e-4, const int bufferSize = 1000000,
const int learningStarts = 100, const int batchSize = 256,
const float tau = 0.005, const float gamma = 0.99,
const int trainFreq = 1, const int gradientSteps = 1,
const bool optimizeMemoryUsage = false, const bool entCoefAuto = true,
const float entCoef = 0.0, const int targetUpdateInterval = 1,
const bool targetEntropyAuto = true, const float targetEntropy = 0.0,
const bool useSde = false, const int sdeSampleFreq = -1,
const bool useSdeAtWarmup = false, const int verbose = 1);
TD3
UFUNCTION(BlueprintPure, Category = "NevarokML|BaseAlgorithm")
static UNevarokMLBaseAlgorithm* TD3(UObject* owner, const ENevarokMLPolicy policy = ENevarokMLPolicy::MLP_POLICY,
const float learningRate = 1e-3, const int bufferSize = 1000000,
const int learningStarts = 100, const int batchSize = 100,
const float tau = 0.005, const float gamma = 0.99,
const int trainFreq = 1, const int gradientSteps = -1,
const bool optimizeMemoryUsage = false, const int policyDelay = 2,
const float targetPolicyNoise = 0.2,
const float targetNoiseClip = 0.5, const int verbose = 1);