mirror of https://github.com/inclusionAI/AReaL
This commit is contained in:
parent
d8bd161b1d
commit
8208f986b9
|
@ -85,62 +85,6 @@ class GenerationHyperparameters:
|
|||
# Train Engine Configs
|
||||
|
||||
|
||||
@dataclass
|
||||
class OptimizerConfig:
|
||||
"""Configuration for model optimization during training.
|
||||
|
||||
Note:
|
||||
Set type to "empty" for models that won't be trained.
|
||||
"""
|
||||
|
||||
type: str = field(
|
||||
default="adam",
|
||||
metadata={"help": "Optimizer type", "choices": ["adam", "empty"]},
|
||||
)
|
||||
lr: float = field(default=2e-5, metadata={"help": "Learning rate"})
|
||||
weight_decay: float = field(default=0.05, metadata={"help": "Weight decay"})
|
||||
beta1: float = field(default=0.9, metadata={"help": "Adam beta1 parameter"})
|
||||
beta2: float = field(default=0.95, metadata={"help": "Adam beta2 parameter"})
|
||||
eps: float = field(default=1e-5, metadata={"help": "Adam epsilon parameter"})
|
||||
min_lr_ratio: float = field(
|
||||
default=0.0,
|
||||
metadata={
|
||||
"help": "Minimum learning rate ratio after annealing",
|
||||
},
|
||||
)
|
||||
lr_scheduler_type: str = field(
|
||||
default="constant",
|
||||
metadata={
|
||||
"help": "Learning rate scheduler type",
|
||||
"choices": ["linear", "cosine", "constant"],
|
||||
},
|
||||
)
|
||||
warmup_steps_proportion: float = field(
|
||||
default=0.001,
|
||||
metadata={
|
||||
"help": "Proportion of training steps for warmup",
|
||||
},
|
||||
)
|
||||
offload: bool = field(
|
||||
default=False, metadata={"help": "Enable optimizer state offloading"}
|
||||
)
|
||||
initial_loss_scale: float = field(
|
||||
default=2**32, metadata={"help": "Initial loss scaling factor"}
|
||||
)
|
||||
min_loss_scale: float = field(
|
||||
default=1.0, metadata={"help": "Minimum loss scaling factor"}
|
||||
)
|
||||
loss_scale_window: float = field(
|
||||
default=5, metadata={"help": "Window size for loss scaling adjustment"}
|
||||
)
|
||||
hysteresis: int = field(
|
||||
default=2, metadata={"help": "Hysteresis (scaling factor) for loss scaling"}
|
||||
)
|
||||
gradient_clipping: float = field(
|
||||
default=1.0, metadata={"help": "Gradient clipping threshold"}
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OptimizerConfig:
|
||||
"""Configuration for model optimization during training.
|
||||
|
|
Loading…
Reference in New Issue