{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2888086642599278, "grad_norm": 0.694216302872654, "learning_rate": 1.7777777777777777e-05, "loss": 0.6462, "step": 5 }, { "epoch": 0.5776173285198556, "grad_norm": 0.837614155906322, "learning_rate": 4e-05, "loss": 0.6855, "step": 10 }, { "epoch": 0.8664259927797834, "grad_norm": 0.7964079287374712, "learning_rate": 3.9625106212547696e-05, "loss": 0.7169, "step": 15 }, { "epoch": 1.1155234657039712, "grad_norm": 0.9598295257498393, "learning_rate": 3.8514479385377813e-05, "loss": 0.637, "step": 20 }, { "epoch": 1.404332129963899, "grad_norm": 0.7944514703083934, "learning_rate": 3.6709756228258735e-05, "loss": 0.4803, "step": 25 }, { "epoch": 1.6931407942238268, "grad_norm": 0.6518082836692719, "learning_rate": 3.4278594691157985e-05, "loss": 0.4857, "step": 30 }, { "epoch": 1.9819494584837545, "grad_norm": 0.6886973647996735, "learning_rate": 3.1312137509730776e-05, "loss": 0.4416, "step": 35 }, { "epoch": 2.2310469314079424, "grad_norm": 0.6635042415823782, "learning_rate": 2.792159532078314e-05, "loss": 0.3906, "step": 40 }, { "epoch": 2.51985559566787, "grad_norm": 0.6315628131041415, "learning_rate": 2.423407744458822e-05, "loss": 0.3395, "step": 45 }, { "epoch": 2.808664259927798, "grad_norm": 0.5243150154226285, "learning_rate": 2.038782663543649e-05, "loss": 0.322, "step": 50 }, { "epoch": 3.0577617328519855, "grad_norm": 0.9433727073495656, "learning_rate": 1.6527036446661396e-05, "loss": 0.2934, "step": 55 }, { "epoch": 3.3465703971119134, "grad_norm": 0.4645384807088082, "learning_rate": 1.2796445503905797e-05, "loss": 0.2552, "step": 60 }, { "epoch": 3.6353790613718413, "grad_norm": 0.3922743659029623, "learning_rate": 9.33591134396618e-06, "loss": 0.2543, "step": 65 }, { "epoch": 3.9241877256317688, "grad_norm": 0.3518437547019525, "learning_rate": 6.275167242625331e-06, "loss": 0.2196, "step": 70 }, { "epoch": 4.1732851985559565, "grad_norm": 0.33805015628047197, "learning_rate": 3.7289585947406504e-06, "loss": 0.2177, "step": 75 }, { "epoch": 4.462093862815885, "grad_norm": 0.27964209039699955, "learning_rate": 1.792741180677069e-06, "loss": 0.2231, "step": 80 }, { "epoch": 4.750902527075812, "grad_norm": 0.2753850482905527, "learning_rate": 5.391025884035239e-07, "loss": 0.1894, "step": 85 }, { "epoch": 5.0, "grad_norm": 0.27245810015793187, "learning_rate": 1.5040949915399173e-08, "loss": 0.209, "step": 90 }, { "epoch": 5.0, "step": 90, "total_flos": 274293220966400.0, "train_loss": 0.3892772012286716, "train_runtime": 9724.0454, "train_samples_per_second": 1.139, "train_steps_per_second": 0.009 } ], "logging_steps": 5, "max_steps": 90, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 274293220966400.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }