| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.942375227311212, | |
| "eval_steps": 100000, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.000942375227311212, | |
| "grad_norm": 14.25, | |
| "learning_rate": 1e-05, | |
| "loss": 0.46586317, | |
| "memory(GiB)": 64.76, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.003324 | |
| }, | |
| { | |
| "epoch": 0.00471187613655606, | |
| "grad_norm": 2.46875, | |
| "learning_rate": 9.999648647603774e-06, | |
| "loss": 0.26192743, | |
| "memory(GiB)": 75.3, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.003362 | |
| }, | |
| { | |
| "epoch": 0.00942375227311212, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 9.998221363123425e-06, | |
| "loss": 0.10271888, | |
| "memory(GiB)": 75.3, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.003359 | |
| }, | |
| { | |
| "epoch": 0.01413562840966818, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 9.995696500215899e-06, | |
| "loss": 0.09046092, | |
| "memory(GiB)": 75.3, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.003358 | |
| }, | |
| { | |
| "epoch": 0.01884750454622424, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 9.992074613325435e-06, | |
| "loss": 0.08653282, | |
| "memory(GiB)": 75.3, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.023559380682780302, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 9.987356497795944e-06, | |
| "loss": 0.08451628, | |
| "memory(GiB)": 75.3, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.003358 | |
| }, | |
| { | |
| "epoch": 0.02827125681933636, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 9.981543189696349e-06, | |
| "loss": 0.0772208, | |
| "memory(GiB)": 75.3, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.003356 | |
| }, | |
| { | |
| "epoch": 0.03298313295589242, | |
| "grad_norm": 1.125, | |
| "learning_rate": 9.97463596559307e-06, | |
| "loss": 0.08322463, | |
| "memory(GiB)": 75.3, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.003356 | |
| }, | |
| { | |
| "epoch": 0.03769500909244848, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 9.966636342269706e-06, | |
| "loss": 0.07725406, | |
| "memory(GiB)": 75.3, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.003355 | |
| }, | |
| { | |
| "epoch": 0.04240688522900454, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 9.957546076393944e-06, | |
| "loss": 0.07683957, | |
| "memory(GiB)": 75.3, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.003356 | |
| }, | |
| { | |
| "epoch": 0.047118761365560605, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 9.947367164131823e-06, | |
| "loss": 0.07508552, | |
| "memory(GiB)": 75.3, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.003355 | |
| }, | |
| { | |
| "epoch": 0.05183063750211667, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 9.936101840709373e-06, | |
| "loss": 0.07236413, | |
| "memory(GiB)": 75.3, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.003353 | |
| }, | |
| { | |
| "epoch": 0.05654251363867272, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 9.923752579921787e-06, | |
| "loss": 0.07231579, | |
| "memory(GiB)": 75.3, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.06125438977522878, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 9.910322093590177e-06, | |
| "loss": 0.07145001, | |
| "memory(GiB)": 75.3, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.06596626591178484, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 9.895813330966086e-06, | |
| "loss": 0.07301619, | |
| "memory(GiB)": 75.3, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.0706781420483409, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 9.880229478083849e-06, | |
| "loss": 0.0724276, | |
| "memory(GiB)": 75.3, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.07539001818489696, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 9.863573957060953e-06, | |
| "loss": 0.06874905, | |
| "memory(GiB)": 75.3, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.08010189432145302, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 9.845850425346563e-06, | |
| "loss": 0.07212579, | |
| "memory(GiB)": 75.3, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.08481377045800909, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 9.827062774918377e-06, | |
| "loss": 0.07294501, | |
| "memory(GiB)": 75.3, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.08952564659456515, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 9.807215131427966e-06, | |
| "loss": 0.06517277, | |
| "memory(GiB)": 75.3, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.09423752273112121, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 9.786311853294799e-06, | |
| "loss": 0.06962139, | |
| "memory(GiB)": 75.3, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.09894939886767727, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 9.764357530749178e-06, | |
| "loss": 0.06724482, | |
| "memory(GiB)": 75.3, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.003339 | |
| }, | |
| { | |
| "epoch": 0.10366127500423333, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 9.741356984824234e-06, | |
| "loss": 0.06572815, | |
| "memory(GiB)": 75.3, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.003339 | |
| }, | |
| { | |
| "epoch": 0.10837315114078938, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 9.717315266297277e-06, | |
| "loss": 0.06739124, | |
| "memory(GiB)": 75.3, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.003342 | |
| }, | |
| { | |
| "epoch": 0.11308502727734544, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 9.692237654580658e-06, | |
| "loss": 0.06834027, | |
| "memory(GiB)": 75.3, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.003342 | |
| }, | |
| { | |
| "epoch": 0.1177969034139015, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 9.66612965656245e-06, | |
| "loss": 0.0658385, | |
| "memory(GiB)": 75.3, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.12250877955045757, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 9.638997005397174e-06, | |
| "loss": 0.0717117, | |
| "memory(GiB)": 75.3, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.003344 | |
| }, | |
| { | |
| "epoch": 0.12722065568701363, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 9.610845659246833e-06, | |
| "loss": 0.0667814, | |
| "memory(GiB)": 75.3, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.003344 | |
| }, | |
| { | |
| "epoch": 0.13193253182356968, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 9.581681799972528e-06, | |
| "loss": 0.06573244, | |
| "memory(GiB)": 75.3, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.13664440796012575, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 9.551511831776966e-06, | |
| "loss": 0.06967602, | |
| "memory(GiB)": 75.3, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.1413562840966818, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 9.520342379798141e-06, | |
| "loss": 0.06216406, | |
| "memory(GiB)": 75.3, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.14606816023323788, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 9.488180288654485e-06, | |
| "loss": 0.06460171, | |
| "memory(GiB)": 75.3, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.15078003636979392, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 9.45503262094184e-06, | |
| "loss": 0.06467786, | |
| "memory(GiB)": 75.3, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.15549191250635, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 9.420906655682553e-06, | |
| "loss": 0.06358048, | |
| "memory(GiB)": 75.3, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.16020378864290605, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 9.385809886727044e-06, | |
| "loss": 0.06778824, | |
| "memory(GiB)": 75.3, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.16491566477946212, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 9.349750021108212e-06, | |
| "loss": 0.06321884, | |
| "memory(GiB)": 75.3, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.16962754091601817, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 9.31273497734901e-06, | |
| "loss": 0.06310185, | |
| "memory(GiB)": 75.3, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.003344 | |
| }, | |
| { | |
| "epoch": 0.17433941705257422, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 9.274772883723587e-06, | |
| "loss": 0.06271737, | |
| "memory(GiB)": 75.3, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.003344 | |
| }, | |
| { | |
| "epoch": 0.1790512931891303, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 9.235872076472378e-06, | |
| "loss": 0.06393245, | |
| "memory(GiB)": 75.3, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.18376316932568634, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 9.196041097971509e-06, | |
| "loss": 0.06558744, | |
| "memory(GiB)": 75.3, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.18847504546224242, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 9.155288694856942e-06, | |
| "loss": 0.06127087, | |
| "memory(GiB)": 75.3, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.19318692159879847, | |
| "grad_norm": 0.875, | |
| "learning_rate": 9.113623816103775e-06, | |
| "loss": 0.06313071, | |
| "memory(GiB)": 75.3, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.19789879773535454, | |
| "grad_norm": 1.0, | |
| "learning_rate": 9.071055611061102e-06, | |
| "loss": 0.06330621, | |
| "memory(GiB)": 75.3, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.2026106738719106, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 9.027593427442867e-06, | |
| "loss": 0.06415906, | |
| "memory(GiB)": 75.3, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.20732255000846667, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 8.98324680927517e-06, | |
| "loss": 0.06299359, | |
| "memory(GiB)": 75.3, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.21203442614502271, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 8.938025494800454e-06, | |
| "loss": 0.06004124, | |
| "memory(GiB)": 75.3, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.003343 | |
| }, | |
| { | |
| "epoch": 0.21674630228157876, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 8.891939414339048e-06, | |
| "loss": 0.06477681, | |
| "memory(GiB)": 75.3, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.003344 | |
| }, | |
| { | |
| "epoch": 0.22145817841813484, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 8.844998688108535e-06, | |
| "loss": 0.06010489, | |
| "memory(GiB)": 75.3, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.003344 | |
| }, | |
| { | |
| "epoch": 0.22617005455469089, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 8.797213624001403e-06, | |
| "loss": 0.05960445, | |
| "memory(GiB)": 75.3, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.23088193069124696, | |
| "grad_norm": 1.0, | |
| "learning_rate": 8.748594715321512e-06, | |
| "loss": 0.06301316, | |
| "memory(GiB)": 75.3, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.235593806827803, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 8.699152638479817e-06, | |
| "loss": 0.06120233, | |
| "memory(GiB)": 75.3, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.24030568296435909, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 8.6488982506499e-06, | |
| "loss": 0.06014684, | |
| "memory(GiB)": 75.3, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.24501755910091513, | |
| "grad_norm": 1.0, | |
| "learning_rate": 8.597842587383797e-06, | |
| "loss": 0.05922247, | |
| "memory(GiB)": 75.3, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.24972943523747118, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 8.545996860188668e-06, | |
| "loss": 0.05851297, | |
| "memory(GiB)": 75.3, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.25444131137402726, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 8.493372454064809e-06, | |
| "loss": 0.05934198, | |
| "memory(GiB)": 75.3, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.003345 | |
| }, | |
| { | |
| "epoch": 0.2591531875105833, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 8.439980925005587e-06, | |
| "loss": 0.06134464, | |
| "memory(GiB)": 75.3, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.26386506364713935, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 8.385833997459804e-06, | |
| "loss": 0.05825667, | |
| "memory(GiB)": 75.3, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.26857693978369546, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 8.330943561757092e-06, | |
| "loss": 0.06092241, | |
| "memory(GiB)": 75.3, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.2732888159202515, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 8.275321671496862e-06, | |
| "loss": 0.05940055, | |
| "memory(GiB)": 75.3, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.27800069205680755, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 8.218980540901417e-06, | |
| "loss": 0.05920713, | |
| "memory(GiB)": 75.3, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.2827125681933636, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 8.16193254213377e-06, | |
| "loss": 0.05777416, | |
| "memory(GiB)": 75.3, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.2874244443299197, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 8.104190202580811e-06, | |
| "loss": 0.05302551, | |
| "memory(GiB)": 75.3, | |
| "step": 305, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.29213632046647575, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 8.045766202102358e-06, | |
| "loss": 0.05804279, | |
| "memory(GiB)": 75.3, | |
| "step": 310, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.2968481966030318, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 7.986673370246743e-06, | |
| "loss": 0.05822692, | |
| "memory(GiB)": 75.3, | |
| "step": 315, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.30156007273958785, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 7.926924683433523e-06, | |
| "loss": 0.06007032, | |
| "memory(GiB)": 75.3, | |
| "step": 320, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.3062719488761439, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 7.866533262103937e-06, | |
| "loss": 0.06018423, | |
| "memory(GiB)": 75.3, | |
| "step": 325, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.3109838250127, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 7.805512367839742e-06, | |
| "loss": 0.05931915, | |
| "memory(GiB)": 75.3, | |
| "step": 330, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.31569570114925605, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 7.743875400451047e-06, | |
| "loss": 0.0566447, | |
| "memory(GiB)": 75.3, | |
| "step": 335, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.3204075772858121, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 7.681635895033798e-06, | |
| "loss": 0.05161901, | |
| "memory(GiB)": 75.3, | |
| "step": 340, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.32511945342236814, | |
| "grad_norm": 1.0, | |
| "learning_rate": 7.6188075189975644e-06, | |
| "loss": 0.05694907, | |
| "memory(GiB)": 75.3, | |
| "step": 345, | |
| "train_speed(iter/s)": 0.003346 | |
| }, | |
| { | |
| "epoch": 0.32983132955892425, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 7.555404069064245e-06, | |
| "loss": 0.05555046, | |
| "memory(GiB)": 75.3, | |
| "step": 350, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.3345432056954803, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 7.491439468238404e-06, | |
| "loss": 0.05587023, | |
| "memory(GiB)": 75.3, | |
| "step": 355, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.33925508183203634, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 7.426927762749867e-06, | |
| "loss": 0.05913154, | |
| "memory(GiB)": 75.3, | |
| "step": 360, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.3439669579685924, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 7.361883118969248e-06, | |
| "loss": 0.05830712, | |
| "memory(GiB)": 75.3, | |
| "step": 365, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.34867883410514844, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 7.2963198202971055e-06, | |
| "loss": 0.05937972, | |
| "memory(GiB)": 75.3, | |
| "step": 370, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.35339071024170454, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 7.230252264027398e-06, | |
| "loss": 0.0565136, | |
| "memory(GiB)": 75.3, | |
| "step": 375, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.3581025863782606, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 7.163694958185928e-06, | |
| "loss": 0.05636386, | |
| "memory(GiB)": 75.3, | |
| "step": 380, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.36281446251481664, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 7.09666251834447e-06, | |
| "loss": 0.06038175, | |
| "memory(GiB)": 75.3, | |
| "step": 385, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.3675263386513727, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 7.0291696644112705e-06, | |
| "loss": 0.05833557, | |
| "memory(GiB)": 75.3, | |
| "step": 390, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.3722382147879288, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 6.9612312173986675e-06, | |
| "loss": 0.05632974, | |
| "memory(GiB)": 75.3, | |
| "step": 395, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.37695009092448484, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 6.892862096168469e-06, | |
| "loss": 0.05656151, | |
| "memory(GiB)": 75.3, | |
| "step": 400, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.3816619670610409, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 6.824077314155877e-06, | |
| "loss": 0.05432441, | |
| "memory(GiB)": 75.3, | |
| "step": 405, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.38637384319759693, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 6.75489197607262e-06, | |
| "loss": 0.05709869, | |
| "memory(GiB)": 75.3, | |
| "step": 410, | |
| "train_speed(iter/s)": 0.003347 | |
| }, | |
| { | |
| "epoch": 0.391085719334153, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 6.6853212745900585e-06, | |
| "loss": 0.05979726, | |
| "memory(GiB)": 75.3, | |
| "step": 415, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.3957975954707091, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 6.615380487002969e-06, | |
| "loss": 0.0600209, | |
| "memory(GiB)": 75.3, | |
| "step": 420, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.40050947160726513, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.0563777, | |
| "memory(GiB)": 75.3, | |
| "step": 425, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.4052213477438212, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 6.474450165664722e-06, | |
| "loss": 0.05698464, | |
| "memory(GiB)": 75.3, | |
| "step": 430, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.40993322388037723, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 6.4034915793385e-06, | |
| "loss": 0.05311573, | |
| "memory(GiB)": 75.3, | |
| "step": 435, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.41464510001693333, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 6.332224794961752e-06, | |
| "loss": 0.05458606, | |
| "memory(GiB)": 75.3, | |
| "step": 440, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.4193569761534894, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 6.260665462278544e-06, | |
| "loss": 0.05579169, | |
| "memory(GiB)": 75.3, | |
| "step": 445, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.42406885229004543, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 6.18882929527473e-06, | |
| "loss": 0.06002288, | |
| "memory(GiB)": 75.3, | |
| "step": 450, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.4287807284266015, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 6.116732068727271e-06, | |
| "loss": 0.05494517, | |
| "memory(GiB)": 75.3, | |
| "step": 455, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.4334926045631575, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 6.0443896147401856e-06, | |
| "loss": 0.0547879, | |
| "memory(GiB)": 75.3, | |
| "step": 460, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.4382044806997136, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 5.971817819267914e-06, | |
| "loss": 0.05363967, | |
| "memory(GiB)": 75.3, | |
| "step": 465, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.4429163568362697, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 5.8990326186268655e-06, | |
| "loss": 0.056594, | |
| "memory(GiB)": 75.3, | |
| "step": 470, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.4476282329728257, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 5.826049995995905e-06, | |
| "loss": 0.05898719, | |
| "memory(GiB)": 75.3, | |
| "step": 475, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.45234010910938177, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 5.752885977906539e-06, | |
| "loss": 0.05439388, | |
| "memory(GiB)": 75.3, | |
| "step": 480, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.4570519852459379, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 5.679556630723592e-06, | |
| "loss": 0.05334362, | |
| "memory(GiB)": 75.3, | |
| "step": 485, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.4617638613824939, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 5.606078057117136e-06, | |
| "loss": 0.06019425, | |
| "memory(GiB)": 75.3, | |
| "step": 490, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.46647573751904997, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 5.532466392526439e-06, | |
| "loss": 0.05597678, | |
| "memory(GiB)": 75.3, | |
| "step": 495, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.471187613655606, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 5.458737801616721e-06, | |
| "loss": 0.05094014, | |
| "memory(GiB)": 75.3, | |
| "step": 500, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.47589948979216207, | |
| "grad_norm": 0.875, | |
| "learning_rate": 5.384908474729501e-06, | |
| "loss": 0.0548723, | |
| "memory(GiB)": 75.3, | |
| "step": 505, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.48061136592871817, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 5.310994624327292e-06, | |
| "loss": 0.05574841, | |
| "memory(GiB)": 75.3, | |
| "step": 510, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.4853232420652742, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 5.23701248143345e-06, | |
| "loss": 0.05651059, | |
| "memory(GiB)": 75.3, | |
| "step": 515, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.49003511820183027, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 5.162978292067933e-06, | |
| "loss": 0.05878415, | |
| "memory(GiB)": 75.3, | |
| "step": 520, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.4947469943383863, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 5.088908313679788e-06, | |
| "loss": 0.05620171, | |
| "memory(GiB)": 75.3, | |
| "step": 525, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.49945887047494236, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 5.014818811577104e-06, | |
| "loss": 0.05407885, | |
| "memory(GiB)": 75.3, | |
| "step": 530, | |
| "train_speed(iter/s)": 0.003348 | |
| }, | |
| { | |
| "epoch": 0.5041707466114984, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.940726055355259e-06, | |
| "loss": 0.05323058, | |
| "memory(GiB)": 75.3, | |
| "step": 535, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.5088826227480545, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.866646315324217e-06, | |
| "loss": 0.05346375, | |
| "memory(GiB)": 75.3, | |
| "step": 540, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.5135944988846106, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.792595858935668e-06, | |
| "loss": 0.05774211, | |
| "memory(GiB)": 75.3, | |
| "step": 545, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.5183063750211666, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 4.718590947210788e-06, | |
| "loss": 0.05547717, | |
| "memory(GiB)": 75.3, | |
| "step": 550, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.5230182511577227, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.644647831169435e-06, | |
| "loss": 0.05536319, | |
| "memory(GiB)": 75.3, | |
| "step": 555, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.5277301272942787, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 4.570782748261516e-06, | |
| "loss": 0.05369086, | |
| "memory(GiB)": 75.3, | |
| "step": 560, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.5324420034308348, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 4.497011918801347e-06, | |
| "loss": 0.05471834, | |
| "memory(GiB)": 75.3, | |
| "step": 565, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5371538795673909, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 4.423351542405764e-06, | |
| "loss": 0.05114409, | |
| "memory(GiB)": 75.3, | |
| "step": 570, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5418657557039469, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 4.349817794436805e-06, | |
| "loss": 0.05673685, | |
| "memory(GiB)": 75.3, | |
| "step": 575, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.546577631840503, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 4.276426822449682e-06, | |
| "loss": 0.05527523, | |
| "memory(GiB)": 75.3, | |
| "step": 580, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.551289507977059, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 4.203194742646893e-06, | |
| "loss": 0.05317973, | |
| "memory(GiB)": 75.3, | |
| "step": 585, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5560013841136151, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.130137636339191e-06, | |
| "loss": 0.05449303, | |
| "memory(GiB)": 75.3, | |
| "step": 590, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5607132602501712, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 4.057271546414242e-06, | |
| "loss": 0.05341119, | |
| "memory(GiB)": 75.3, | |
| "step": 595, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5654251363867272, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.984612473813689e-06, | |
| "loss": 0.05254069, | |
| "memory(GiB)": 75.3, | |
| "step": 600, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5701370125232833, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 3.912176374019462e-06, | |
| "loss": 0.05324795, | |
| "memory(GiB)": 75.3, | |
| "step": 605, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5748488886598394, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.839979153550039e-06, | |
| "loss": 0.05177047, | |
| "memory(GiB)": 75.3, | |
| "step": 610, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5795607647963954, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.768036666467486e-06, | |
| "loss": 0.05265539, | |
| "memory(GiB)": 75.3, | |
| "step": 615, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5842726409329515, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.6963647108959868e-06, | |
| "loss": 0.05418316, | |
| "memory(GiB)": 75.3, | |
| "step": 620, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5889845170695075, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 3.6249790255526916e-06, | |
| "loss": 0.05562772, | |
| "memory(GiB)": 75.3, | |
| "step": 625, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5936963932060636, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 3.553895286291577e-06, | |
| "loss": 0.05445199, | |
| "memory(GiB)": 75.3, | |
| "step": 630, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.5984082693426197, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 3.483129102661137e-06, | |
| "loss": 0.05333483, | |
| "memory(GiB)": 75.3, | |
| "step": 635, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6031201454791757, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.4126960144766107e-06, | |
| "loss": 0.05417204, | |
| "memory(GiB)": 75.3, | |
| "step": 640, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6078320216157318, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.3426114884075488e-06, | |
| "loss": 0.05412987, | |
| "memory(GiB)": 75.3, | |
| "step": 645, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6125438977522878, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.272890914581417e-06, | |
| "loss": 0.05388454, | |
| "memory(GiB)": 75.3, | |
| "step": 650, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.6172557738888439, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.2035496032040303e-06, | |
| "loss": 0.05097753, | |
| "memory(GiB)": 75.3, | |
| "step": 655, | |
| "train_speed(iter/s)": 0.003349 | |
| }, | |
| { | |
| "epoch": 0.6219676500254, | |
| "grad_norm": 0.875, | |
| "learning_rate": 3.134602781197515e-06, | |
| "loss": 0.05341196, | |
| "memory(GiB)": 75.3, | |
| "step": 660, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.626679526161956, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 3.0660655888565827e-06, | |
| "loss": 0.05016219, | |
| "memory(GiB)": 75.3, | |
| "step": 665, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6313914022985121, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.997953076523803e-06, | |
| "loss": 0.05216441, | |
| "memory(GiB)": 75.3, | |
| "step": 670, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6361032784350681, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.930280201284654e-06, | |
| "loss": 0.05449665, | |
| "memory(GiB)": 75.3, | |
| "step": 675, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6408151545716242, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.863061823683032e-06, | |
| "loss": 0.05129569, | |
| "memory(GiB)": 75.3, | |
| "step": 680, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6455270307081803, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 2.7963127044579697e-06, | |
| "loss": 0.05290835, | |
| "memory(GiB)": 75.3, | |
| "step": 685, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6502389068447363, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.7300475013022666e-06, | |
| "loss": 0.0528672, | |
| "memory(GiB)": 75.3, | |
| "step": 690, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6549507829812924, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.6642807656437565e-06, | |
| "loss": 0.05229232, | |
| "memory(GiB)": 75.3, | |
| "step": 695, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6596626591178485, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.599026939449899e-06, | |
| "loss": 0.05371115, | |
| "memory(GiB)": 75.3, | |
| "step": 700, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6643745352544045, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.534300352056416e-06, | |
| "loss": 0.05234203, | |
| "memory(GiB)": 75.3, | |
| "step": 705, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6690864113909606, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.470115217020654e-06, | |
| "loss": 0.05360326, | |
| "memory(GiB)": 75.3, | |
| "step": 710, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6737982875275166, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.4064856290003863e-06, | |
| "loss": 0.05475932, | |
| "memory(GiB)": 75.3, | |
| "step": 715, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6785101636640727, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 2.3434255606586925e-06, | |
| "loss": 0.05548735, | |
| "memory(GiB)": 75.3, | |
| "step": 720, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6832220398006288, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 2.2809488595956746e-06, | |
| "loss": 0.05201564, | |
| "memory(GiB)": 75.3, | |
| "step": 725, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.6879339159371848, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.219069245307589e-06, | |
| "loss": 0.05408272, | |
| "memory(GiB)": 75.3, | |
| "step": 730, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.6926457920737409, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 2.157800306174139e-06, | |
| "loss": 0.05537663, | |
| "memory(GiB)": 75.3, | |
| "step": 735, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.6973576682102969, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.0971554964745476e-06, | |
| "loss": 0.05455139, | |
| "memory(GiB)": 75.3, | |
| "step": 740, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.702069544346853, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 2.0371481334330913e-06, | |
| "loss": 0.05394316, | |
| "memory(GiB)": 75.3, | |
| "step": 745, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7067814204834091, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.9777913942946987e-06, | |
| "loss": 0.05269849, | |
| "memory(GiB)": 75.3, | |
| "step": 750, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7114932966199651, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.919098313431335e-06, | |
| "loss": 0.05057405, | |
| "memory(GiB)": 75.3, | |
| "step": 755, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7162051727565212, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1.8610817794797164e-06, | |
| "loss": 0.05438253, | |
| "memory(GiB)": 75.3, | |
| "step": 760, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7209170488930772, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.8037545325110506e-06, | |
| "loss": 0.05222658, | |
| "memory(GiB)": 75.3, | |
| "step": 765, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7256289250296333, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.7471291612333997e-06, | |
| "loss": 0.05131737, | |
| "memory(GiB)": 75.3, | |
| "step": 770, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7303408011661894, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 1.6912181002272714e-06, | |
| "loss": 0.05391481, | |
| "memory(GiB)": 75.3, | |
| "step": 775, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7350526773027454, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.6360336272150684e-06, | |
| "loss": 0.05078862, | |
| "memory(GiB)": 75.3, | |
| "step": 780, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7397645534393015, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.581587860364977e-06, | |
| "loss": 0.05192038, | |
| "memory(GiB)": 75.3, | |
| "step": 785, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7444764295758576, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.52789275562988e-06, | |
| "loss": 0.05364103, | |
| "memory(GiB)": 75.3, | |
| "step": 790, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7491883057124136, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.4749601041219246e-06, | |
| "loss": 0.0536845, | |
| "memory(GiB)": 75.3, | |
| "step": 795, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7539001818489697, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.4228015295232484e-06, | |
| "loss": 0.05084696, | |
| "memory(GiB)": 75.3, | |
| "step": 800, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.7586120579855257, | |
| "grad_norm": 0.875, | |
| "learning_rate": 1.371428485533498e-06, | |
| "loss": 0.05773014, | |
| "memory(GiB)": 75.3, | |
| "step": 805, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.7633239341220818, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.3208522533546748e-06, | |
| "loss": 0.05219783, | |
| "memory(GiB)": 75.3, | |
| "step": 810, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.7680358102586379, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.2710839392138386e-06, | |
| "loss": 0.05375321, | |
| "memory(GiB)": 75.3, | |
| "step": 815, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.7727476863951939, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.222134471924259e-06, | |
| "loss": 0.05204231, | |
| "memory(GiB)": 75.3, | |
| "step": 820, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.77745956253175, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.1740146004855141e-06, | |
| "loss": 0.0559127, | |
| "memory(GiB)": 75.3, | |
| "step": 825, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.782171438668306, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.1267348917230737e-06, | |
| "loss": 0.05298336, | |
| "memory(GiB)": 75.3, | |
| "step": 830, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.7868833148048621, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 1.080305727967893e-06, | |
| "loss": 0.05347639, | |
| "memory(GiB)": 75.3, | |
| "step": 835, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.7915951909414182, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.0347373047765202e-06, | |
| "loss": 0.05329442, | |
| "memory(GiB)": 75.3, | |
| "step": 840, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.7963070670779742, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 9.900396286922025e-07, | |
| "loss": 0.0537856, | |
| "memory(GiB)": 75.3, | |
| "step": 845, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.8010189432145303, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 9.462225150475296e-07, | |
| "loss": 0.05233877, | |
| "memory(GiB)": 75.3, | |
| "step": 850, | |
| "train_speed(iter/s)": 0.00335 | |
| }, | |
| { | |
| "epoch": 0.8057308193510863, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 9.032955858090319e-07, | |
| "loss": 0.0549244, | |
| "memory(GiB)": 75.3, | |
| "step": 855, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8104426954876424, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 8.612682674642647e-07, | |
| "loss": 0.04935811, | |
| "memory(GiB)": 75.3, | |
| "step": 860, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8151545716241985, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 8.201497889518073e-07, | |
| "loss": 0.05281691, | |
| "memory(GiB)": 75.3, | |
| "step": 865, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8198664477607545, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 7.799491796346487e-07, | |
| "loss": 0.05795277, | |
| "memory(GiB)": 75.3, | |
| "step": 870, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8245783238973106, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 7.406752673173851e-07, | |
| "loss": 0.05225162, | |
| "memory(GiB)": 75.3, | |
| "step": 875, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8292902000338667, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 7.023366763077044e-07, | |
| "loss": 0.0509973, | |
| "memory(GiB)": 75.3, | |
| "step": 880, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8340020761704227, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 6.649418255225298e-07, | |
| "loss": 0.05142277, | |
| "memory(GiB)": 75.3, | |
| "step": 885, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8387139523069788, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 6.284989266392805e-07, | |
| "loss": 0.05023923, | |
| "memory(GiB)": 75.3, | |
| "step": 890, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8434258284435348, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 5.930159822926407e-07, | |
| "loss": 0.0534648, | |
| "memory(GiB)": 75.3, | |
| "step": 895, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8481377045800909, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 5.585007843172286e-07, | |
| "loss": 0.05155768, | |
| "memory(GiB)": 75.3, | |
| "step": 900, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.852849580716647, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 5.249609120365579e-07, | |
| "loss": 0.05368913, | |
| "memory(GiB)": 75.3, | |
| "step": 905, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.857561456853203, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.924037305986696e-07, | |
| "loss": 0.05452033, | |
| "memory(GiB)": 75.3, | |
| "step": 910, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8622733329897591, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.6083638935878025e-07, | |
| "loss": 0.05384221, | |
| "memory(GiB)": 75.3, | |
| "step": 915, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.866985209126315, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.302658203093418e-07, | |
| "loss": 0.05272598, | |
| "memory(GiB)": 75.3, | |
| "step": 920, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8716970852628712, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 4.00698736557808e-07, | |
| "loss": 0.05447989, | |
| "memory(GiB)": 75.3, | |
| "step": 925, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8764089613994273, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 3.721416308524839e-07, | |
| "loss": 0.05123619, | |
| "memory(GiB)": 75.3, | |
| "step": 930, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8811208375359832, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.4460077415675473e-07, | |
| "loss": 0.05347574, | |
| "memory(GiB)": 75.3, | |
| "step": 935, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8858327136725394, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.1808221427202636e-07, | |
| "loss": 0.05334803, | |
| "memory(GiB)": 75.3, | |
| "step": 940, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8905445898090953, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.925917745096568e-07, | |
| "loss": 0.05249671, | |
| "memory(GiB)": 75.3, | |
| "step": 945, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8952564659456514, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 2.681350524122045e-07, | |
| "loss": 0.05494893, | |
| "memory(GiB)": 75.3, | |
| "step": 950, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.8999683420822076, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.447174185242324e-07, | |
| "loss": 0.05149726, | |
| "memory(GiB)": 75.3, | |
| "step": 955, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.9046802182187635, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 2.2234401521297576e-07, | |
| "loss": 0.05425293, | |
| "memory(GiB)": 75.3, | |
| "step": 960, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.9093920943553196, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.01019755539108e-07, | |
| "loss": 0.0552171, | |
| "memory(GiB)": 75.3, | |
| "step": 965, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.9141039704918758, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.8074932217786445e-07, | |
| "loss": 0.05237709, | |
| "memory(GiB)": 75.3, | |
| "step": 970, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.9188158466284317, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.6153716639075223e-07, | |
| "loss": 0.05221198, | |
| "memory(GiB)": 75.3, | |
| "step": 975, | |
| "train_speed(iter/s)": 0.003351 | |
| }, | |
| { | |
| "epoch": 0.9235277227649878, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.433875070480878e-07, | |
| "loss": 0.05134506, | |
| "memory(GiB)": 75.3, | |
| "step": 980, | |
| "train_speed(iter/s)": 0.003352 | |
| }, | |
| { | |
| "epoch": 0.9282395989015438, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.2630432970255014e-07, | |
| "loss": 0.05436495, | |
| "memory(GiB)": 75.3, | |
| "step": 985, | |
| "train_speed(iter/s)": 0.003352 | |
| }, | |
| { | |
| "epoch": 0.9329514750380999, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 1.1029138571398645e-07, | |
| "loss": 0.05440986, | |
| "memory(GiB)": 75.3, | |
| "step": 990, | |
| "train_speed(iter/s)": 0.003352 | |
| }, | |
| { | |
| "epoch": 0.937663351174656, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 9.535219142563168e-08, | |
| "loss": 0.05418127, | |
| "memory(GiB)": 75.3, | |
| "step": 995, | |
| "train_speed(iter/s)": 0.003352 | |
| }, | |
| { | |
| "epoch": 0.942375227311212, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 8.149002739194222e-08, | |
| "loss": 0.05519557, | |
| "memory(GiB)": 75.3, | |
| "step": 1000, | |
| "train_speed(iter/s)": 0.003352 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1061, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.440049406181114e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |