louisguthmann's picture
Update 2B adapter with repair_v3b full run
432b303 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8205128205128205,
"eval_steps": 500,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 1.9341673284769059,
"epoch": 0.06837606837606838,
"grad_norm": 1.2578125,
"learning_rate": 2.7750000000000004e-05,
"loss": 1.8236064910888672,
"mean_token_accuracy": 0.6031250022351742,
"num_tokens": 81920.0,
"step": 10
},
{
"entropy": 0.5755251368507743,
"epoch": 0.13675213675213677,
"grad_norm": 0.306640625,
"learning_rate": 2.525e-05,
"loss": 0.3169667959213257,
"mean_token_accuracy": 0.9330759823322297,
"num_tokens": 163840.0,
"step": 20
},
{
"entropy": 0.03900249442085624,
"epoch": 0.20512820512820512,
"grad_norm": 0.0284423828125,
"learning_rate": 2.275e-05,
"loss": 0.0061474073678255085,
"mean_token_accuracy": 1.0,
"num_tokens": 245760.0,
"step": 30
},
{
"entropy": 0.013878341647796333,
"epoch": 0.27350427350427353,
"grad_norm": 0.01019287109375,
"learning_rate": 2.025e-05,
"loss": 0.001719124987721443,
"mean_token_accuracy": 1.0,
"num_tokens": 327680.0,
"step": 40
},
{
"entropy": 0.007600244274362922,
"epoch": 0.3418803418803419,
"grad_norm": 0.005950927734375,
"learning_rate": 1.775e-05,
"loss": 0.0008654000237584114,
"mean_token_accuracy": 1.0,
"num_tokens": 409600.0,
"step": 50
},
{
"entropy": 0.005724262841977179,
"epoch": 0.41025641025641024,
"grad_norm": 0.00482177734375,
"learning_rate": 1.525e-05,
"loss": 0.0006292078644037247,
"mean_token_accuracy": 1.0,
"num_tokens": 491520.0,
"step": 60
},
{
"entropy": 0.005162813549395651,
"epoch": 0.47863247863247865,
"grad_norm": 0.00439453125,
"learning_rate": 1.275e-05,
"loss": 0.0005607008002698422,
"mean_token_accuracy": 1.0,
"num_tokens": 573440.0,
"step": 70
},
{
"entropy": 0.004928319191094488,
"epoch": 0.5470085470085471,
"grad_norm": 0.00421142578125,
"learning_rate": 1.025e-05,
"loss": 0.0005334688816219568,
"mean_token_accuracy": 1.0,
"num_tokens": 655360.0,
"step": 80
},
{
"entropy": 0.00477353862952441,
"epoch": 0.6153846153846154,
"grad_norm": 0.004150390625,
"learning_rate": 7.75e-06,
"loss": 0.0005137024912983179,
"mean_token_accuracy": 1.0,
"num_tokens": 737280.0,
"step": 90
},
{
"entropy": 0.004704260791186243,
"epoch": 0.6837606837606838,
"grad_norm": 0.0040283203125,
"learning_rate": 5.25e-06,
"loss": 0.0005053752567619086,
"mean_token_accuracy": 1.0,
"num_tokens": 819200.0,
"step": 100
},
{
"entropy": 0.004669400909915566,
"epoch": 0.7521367521367521,
"grad_norm": 0.0040283203125,
"learning_rate": 2.75e-06,
"loss": 0.0005010279826819896,
"mean_token_accuracy": 1.0,
"num_tokens": 901120.0,
"step": 110
},
{
"entropy": 0.004655064782127738,
"epoch": 0.8205128205128205,
"grad_norm": 0.00396728515625,
"learning_rate": 2.5e-07,
"loss": 0.0004998001269996167,
"mean_token_accuracy": 1.0,
"num_tokens": 983040.0,
"step": 120
}
],
"logging_steps": 10,
"max_steps": 120,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 120,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8228571017379840.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}